raw/ioat: move idxd functions to separate file
[dpdk.git] / drivers / raw / ioat / rte_ioat_rawdev_fns.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4 #ifndef _RTE_IOAT_RAWDEV_FNS_H_
5 #define _RTE_IOAT_RAWDEV_FNS_H_
6
7 /**
8  * @file
9  * This header file contains the implementation of the various ioat
10  * rawdev functions for IOAT/CBDMA hardware. The API specification and key
11  * public structures are defined in "rte_ioat_rawdev.h".
12  *
13  * This file should not be included directly, but instead applications should
14  * include "rte_ioat_rawdev.h", which then includes this file - and the IDXD/DSA
15  * equivalent header - in turn.
16  */
17
18 #include <x86intrin.h>
19 #include <rte_rawdev.h>
20 #include <rte_memzone.h>
21 #include <rte_prefetch.h>
22
23 /**
24  * @internal
25  * Identify the data path to use.
26  * Must be first field of rte_ioat_rawdev and rte_idxd_rawdev structs
27  */
28 enum rte_ioat_dev_type {
29         RTE_IOAT_DEV,
30         RTE_IDXD_DEV,
31 };
32
33 /**
34  * @internal
35  * some statistics for tracking, if added/changed update xstats fns
36  */
37 struct rte_ioat_xstats {
38         uint64_t enqueue_failed;
39         uint64_t enqueued;
40         uint64_t started;
41         uint64_t completed;
42 };
43
44 #include "rte_idxd_rawdev_fns.h"
45
46 /**
47  * @internal
48  * Structure representing a device descriptor
49  */
50 struct rte_ioat_generic_hw_desc {
51         uint32_t size;
52         union {
53                 uint32_t control_raw;
54                 struct {
55                         uint32_t int_enable: 1;
56                         uint32_t src_snoop_disable: 1;
57                         uint32_t dest_snoop_disable: 1;
58                         uint32_t completion_update: 1;
59                         uint32_t fence: 1;
60                         uint32_t reserved2: 1;
61                         uint32_t src_page_break: 1;
62                         uint32_t dest_page_break: 1;
63                         uint32_t bundle: 1;
64                         uint32_t dest_dca: 1;
65                         uint32_t hint: 1;
66                         uint32_t reserved: 13;
67                         uint32_t op: 8;
68                 } control;
69         } u;
70         uint64_t src_addr;
71         uint64_t dest_addr;
72         uint64_t next;
73         uint64_t op_specific[4];
74 };
75
76 /**
77  * @internal
78  * Structure representing an IOAT device instance
79  */
80 struct rte_ioat_rawdev {
81         /* common fields at the top - match those in rte_idxd_rawdev */
82         enum rte_ioat_dev_type type;
83         struct rte_ioat_xstats xstats;
84
85         struct rte_rawdev *rawdev;
86         const struct rte_memzone *mz;
87         const struct rte_memzone *desc_mz;
88
89         volatile uint16_t *doorbell __rte_cache_aligned;
90         phys_addr_t status_addr;
91         phys_addr_t ring_addr;
92
93         unsigned short ring_size;
94         bool hdls_disable;
95         struct rte_ioat_generic_hw_desc *desc_ring;
96         __m128i *hdls; /* completion handles for returning to user */
97
98
99         unsigned short next_read;
100         unsigned short next_write;
101
102         /* to report completions, the device will write status back here */
103         volatile uint64_t status __rte_cache_aligned;
104
105         /* pointer to the register bar */
106         volatile struct rte_ioat_registers *regs;
107 };
108
109 #define RTE_IOAT_CHANSTS_IDLE                   0x1
110 #define RTE_IOAT_CHANSTS_SUSPENDED              0x2
111 #define RTE_IOAT_CHANSTS_HALTED                 0x3
112 #define RTE_IOAT_CHANSTS_ARMED                  0x4
113
114 static __rte_always_inline int
115 __ioat_write_desc(int dev_id, uint32_t op, uint64_t src, phys_addr_t dst,
116                 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
117 {
118         struct rte_ioat_rawdev *ioat =
119                         (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
120         unsigned short read = ioat->next_read;
121         unsigned short write = ioat->next_write;
122         unsigned short mask = ioat->ring_size - 1;
123         unsigned short space = mask + read - write;
124         struct rte_ioat_generic_hw_desc *desc;
125
126         if (space == 0) {
127                 ioat->xstats.enqueue_failed++;
128                 return 0;
129         }
130
131         ioat->next_write = write + 1;
132         write &= mask;
133
134         desc = &ioat->desc_ring[write];
135         desc->size = length;
136         /* set descriptor write-back every 16th descriptor */
137         desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
138                         (!(write & 0xF) << IOAT_COMP_UPDATE_SHIFT));
139         desc->src_addr = src;
140         desc->dest_addr = dst;
141
142         if (!ioat->hdls_disable)
143                 ioat->hdls[write] = _mm_set_epi64x((int64_t)dst_hdl,
144                                         (int64_t)src_hdl);
145         rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
146
147         ioat->xstats.enqueued++;
148         return 1;
149 }
150
151 static __rte_always_inline int
152 __ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst,
153                 unsigned int length, uintptr_t dst_hdl)
154 {
155         static const uintptr_t null_hdl;
156
157         return __ioat_write_desc(dev_id, ioat_op_fill, pattern, dst, length,
158                         null_hdl, dst_hdl);
159 }
160
161 /*
162  * Enqueue a copy operation onto the ioat device
163  */
164 static __rte_always_inline int
165 __ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
166                 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
167 {
168         return __ioat_write_desc(dev_id, ioat_op_copy, src, dst, length,
169                         src_hdl, dst_hdl);
170 }
171
172 /* add fence to last written descriptor */
173 static __rte_always_inline int
174 __ioat_fence(int dev_id)
175 {
176         struct rte_ioat_rawdev *ioat =
177                         (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
178         unsigned short write = ioat->next_write;
179         unsigned short mask = ioat->ring_size - 1;
180         struct rte_ioat_generic_hw_desc *desc;
181
182         write = (write - 1) & mask;
183         desc = &ioat->desc_ring[write];
184
185         desc->u.control.fence = 1;
186         return 0;
187 }
188
189 /*
190  * Trigger hardware to begin performing enqueued operations
191  */
192 static __rte_always_inline int
193 __ioat_perform_ops(int dev_id)
194 {
195         struct rte_ioat_rawdev *ioat =
196                         (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
197         ioat->desc_ring[(ioat->next_write - 1) & (ioat->ring_size - 1)].u
198                         .control.completion_update = 1;
199         rte_compiler_barrier();
200         *ioat->doorbell = ioat->next_write;
201         ioat->xstats.started = ioat->xstats.enqueued;
202
203         return 0;
204 }
205
206 /**
207  * @internal
208  * Returns the index of the last completed operation.
209  */
210 static __rte_always_inline int
211 __ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error)
212 {
213         uint64_t status = ioat->status;
214
215         /* lower 3 bits indicate "transfer status" : active, idle, halted.
216          * We can ignore bit 0.
217          */
218         *error = status & (RTE_IOAT_CHANSTS_SUSPENDED | RTE_IOAT_CHANSTS_ARMED);
219         return (status - ioat->ring_addr) >> 6;
220 }
221
222 /*
223  * Returns details of operations that have been completed
224  */
225 static __rte_always_inline int
226 __ioat_completed_ops(int dev_id, uint8_t max_copies,
227                 uintptr_t *src_hdls, uintptr_t *dst_hdls)
228 {
229         struct rte_ioat_rawdev *ioat =
230                         (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
231         unsigned short mask = (ioat->ring_size - 1);
232         unsigned short read = ioat->next_read;
233         unsigned short end_read, count;
234         int error;
235         int i = 0;
236
237         end_read = (__ioat_get_last_completed(ioat, &error) + 1) & mask;
238         count = (end_read - (read & mask)) & mask;
239
240         if (error) {
241                 rte_errno = EIO;
242                 return -1;
243         }
244
245         if (ioat->hdls_disable) {
246                 read += count;
247                 goto end;
248         }
249
250         if (count > max_copies)
251                 count = max_copies;
252
253         for (; i < count - 1; i += 2, read += 2) {
254                 __m128i hdls0 = _mm_load_si128(&ioat->hdls[read & mask]);
255                 __m128i hdls1 = _mm_load_si128(&ioat->hdls[(read + 1) & mask]);
256
257                 _mm_storeu_si128((__m128i *)&src_hdls[i],
258                                 _mm_unpacklo_epi64(hdls0, hdls1));
259                 _mm_storeu_si128((__m128i *)&dst_hdls[i],
260                                 _mm_unpackhi_epi64(hdls0, hdls1));
261         }
262         for (; i < count; i++, read++) {
263                 uintptr_t *hdls = (uintptr_t *)&ioat->hdls[read & mask];
264                 src_hdls[i] = hdls[0];
265                 dst_hdls[i] = hdls[1];
266         }
267
268 end:
269         ioat->next_read = read;
270         ioat->xstats.completed += count;
271         return count;
272 }
273
274 static inline int
275 rte_ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst,
276                 unsigned int len, uintptr_t dst_hdl)
277 {
278         enum rte_ioat_dev_type *type =
279                         (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
280         if (*type == RTE_IDXD_DEV)
281                 return __idxd_enqueue_fill(dev_id, pattern, dst, len, dst_hdl);
282         else
283                 return __ioat_enqueue_fill(dev_id, pattern, dst, len, dst_hdl);
284 }
285
286 static inline int
287 rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
288                 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
289 {
290         enum rte_ioat_dev_type *type =
291                         (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
292         if (*type == RTE_IDXD_DEV)
293                 return __idxd_enqueue_copy(dev_id, src, dst, length,
294                                 src_hdl, dst_hdl);
295         else
296                 return __ioat_enqueue_copy(dev_id, src, dst, length,
297                                 src_hdl, dst_hdl);
298 }
299
300 static inline int
301 rte_ioat_fence(int dev_id)
302 {
303         enum rte_ioat_dev_type *type =
304                         (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
305         if (*type == RTE_IDXD_DEV)
306                 return __idxd_fence(dev_id);
307         else
308                 return __ioat_fence(dev_id);
309 }
310
311 static inline int
312 rte_ioat_perform_ops(int dev_id)
313 {
314         enum rte_ioat_dev_type *type =
315                         (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
316         if (*type == RTE_IDXD_DEV)
317                 return __idxd_perform_ops(dev_id);
318         else
319                 return __ioat_perform_ops(dev_id);
320 }
321
322 static inline int
323 rte_ioat_completed_ops(int dev_id, uint8_t max_copies,
324                 uintptr_t *src_hdls, uintptr_t *dst_hdls)
325 {
326         enum rte_ioat_dev_type *type =
327                         (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
328         if (*type == RTE_IDXD_DEV)
329                 return __idxd_completed_ops(dev_id, max_copies,
330                                 src_hdls, dst_hdls);
331         else
332                 return __ioat_completed_ops(dev_id,  max_copies,
333                                 src_hdls, dst_hdls);
334 }
335
336 static inline void
337 __rte_deprecated_msg("use rte_ioat_perform_ops() instead")
338 rte_ioat_do_copies(int dev_id) { rte_ioat_perform_ops(dev_id); }
339
340 static inline int
341 __rte_deprecated_msg("use rte_ioat_completed_ops() instead")
342 rte_ioat_completed_copies(int dev_id, uint8_t max_copies,
343                 uintptr_t *src_hdls, uintptr_t *dst_hdls)
344 {
345         return rte_ioat_completed_ops(dev_id, max_copies, src_hdls, dst_hdls);
346 }
347
348 #endif /* _RTE_IOAT_RAWDEV_FNS_H_ */