304a808b55dd304cdac4980fbfbb4e55dd62dc6a
[dpdk.git] / drivers / net / virtio / virtio_user / vhost_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Red Hat Inc.
3  */
4
5 #include <sys/ioctl.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10
11 #include <rte_memory.h>
12
13 #include "vhost.h"
14 #include "virtio_user_dev.h"
15
16 /* vhost kernel & vdpa ioctls */
17 #define VHOST_VIRTIO 0xAF
18 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
19 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
20 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
21 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
22 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
23 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
24 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
25 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
26 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
27 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
28 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
29 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
30 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
31 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
32 #define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
33 #define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
34 #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
35 #define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, struct vhost_vring_state)
36 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
37 #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
38
39 static uint64_t vhost_req_user_to_vdpa[] = {
40         [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
41         [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
42         [VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS,
43         [VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS,
44 };
45
46 /* no alignment requirement */
47 struct vhost_iotlb_msg {
48         uint64_t iova;
49         uint64_t size;
50         uint64_t uaddr;
51 #define VHOST_ACCESS_RO      0x1
52 #define VHOST_ACCESS_WO      0x2
53 #define VHOST_ACCESS_RW      0x3
54         uint8_t perm;
55 #define VHOST_IOTLB_MISS           1
56 #define VHOST_IOTLB_UPDATE         2
57 #define VHOST_IOTLB_INVALIDATE     3
58 #define VHOST_IOTLB_ACCESS_FAIL    4
59 #define VHOST_IOTLB_BATCH_BEGIN    5
60 #define VHOST_IOTLB_BATCH_END      6
61         uint8_t type;
62 };
63
64 #define VHOST_IOTLB_MSG_V2 0x2
65
66 struct vhost_msg {
67         uint32_t type;
68         uint32_t reserved;
69         union {
70                 struct vhost_iotlb_msg iotlb;
71                 uint8_t padding[64];
72         };
73 };
74
75
76 static int
77 vhost_vdpa_ioctl(int fd, uint64_t request, void *arg)
78 {
79         int ret;
80
81         ret = ioctl(fd, request, arg);
82         if (ret) {
83                 PMD_DRV_LOG(ERR, "Vhost-vDPA ioctl %"PRIu64" failed (%s)",
84                                 request, strerror(errno));
85                 return -1;
86         }
87
88         return 0;
89 }
90
91 static int
92 vhost_vdpa_set_owner(struct virtio_user_dev *dev)
93 {
94         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_OWNER, NULL);
95 }
96
97 static int
98 vhost_vdpa_get_backend_features(struct virtio_user_dev *dev, uint64_t *features)
99 {
100         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_GET_BACKEND_FEATURES, features);
101 }
102
103 static int
104 vhost_vdpa_set_backend_features(struct virtio_user_dev *dev, uint64_t features)
105 {
106         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_BACKEND_FEATURES, &features);
107 }
108
109 static int
110 vhost_vdpa_get_features(struct virtio_user_dev *dev, uint64_t *features)
111 {
112         int ret;
113
114         ret = vhost_vdpa_ioctl(dev->vhostfd, VHOST_GET_FEATURES, features);
115         if (ret) {
116                 PMD_DRV_LOG(ERR, "Failed to get features");
117                 return -1;
118         }
119
120         /* Multiqueue not supported for now */
121         *features &= ~(1ULL << VIRTIO_NET_F_MQ);
122
123         return 0;
124 }
125
126 static int
127 vhost_vdpa_set_features(struct virtio_user_dev *dev, uint64_t features)
128 {
129         /* WORKAROUND */
130         features |= 1ULL << VIRTIO_F_IOMMU_PLATFORM;
131
132         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_FEATURES, &features);
133 }
134
135 static int
136 vhost_vdpa_iotlb_batch_begin(struct virtio_user_dev *dev)
137 {
138         struct vhost_msg msg = {};
139
140         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
141                 return 0;
142
143         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
144                 PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
145                 return -1;
146         }
147
148         msg.type = VHOST_IOTLB_MSG_V2;
149         msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
150
151         if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
152                 PMD_DRV_LOG(ERR, "Failed to send IOTLB batch begin (%s)",
153                                 strerror(errno));
154                 return -1;
155         }
156
157         return 0;
158 }
159
160 static int
161 vhost_vdpa_iotlb_batch_end(struct virtio_user_dev *dev)
162 {
163         struct vhost_msg msg = {};
164
165         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
166                 return 0;
167
168         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
169                 PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
170                 return -1;
171         }
172
173         msg.type = VHOST_IOTLB_MSG_V2;
174         msg.iotlb.type = VHOST_IOTLB_BATCH_END;
175
176         if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
177                 PMD_DRV_LOG(ERR, "Failed to send IOTLB batch end (%s)",
178                                 strerror(errno));
179                 return -1;
180         }
181
182         return 0;
183 }
184
185 static int
186 vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr,
187                                   uint64_t iova, size_t len)
188 {
189         struct vhost_msg msg = {};
190
191         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
192                 PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
193                 return -1;
194         }
195
196         msg.type = VHOST_IOTLB_MSG_V2;
197         msg.iotlb.type = VHOST_IOTLB_UPDATE;
198         msg.iotlb.iova = iova;
199         msg.iotlb.uaddr = (uint64_t)(uintptr_t)addr;
200         msg.iotlb.size = len;
201         msg.iotlb.perm = VHOST_ACCESS_RW;
202
203         PMD_DRV_LOG(DEBUG, "%s: iova: 0x%" PRIx64 ", addr: %p, len: 0x%zx",
204                         __func__, iova, addr, len);
205
206         if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
207                 PMD_DRV_LOG(ERR, "Failed to send IOTLB update (%s)",
208                                 strerror(errno));
209                 return -1;
210         }
211
212         return 0;
213 }
214
215 static int
216 vhost_vdpa_dma_unmap(struct virtio_user_dev *dev, __rte_unused void *addr,
217                                   uint64_t iova, size_t len)
218 {
219         struct vhost_msg msg = {};
220
221         if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
222                 PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
223                 return -1;
224         }
225
226         msg.type = VHOST_IOTLB_MSG_V2;
227         msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
228         msg.iotlb.iova = iova;
229         msg.iotlb.size = len;
230
231         PMD_DRV_LOG(DEBUG, "%s: iova: 0x%" PRIx64 ", len: 0x%zx",
232                         __func__, iova, len);
233
234         if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
235                 PMD_DRV_LOG(ERR, "Failed to send IOTLB invalidate (%s)",
236                                 strerror(errno));
237                 return -1;
238         }
239
240         return 0;
241 }
242
243 static int
244 vhost_vdpa_dma_map_batch(struct virtio_user_dev *dev, void *addr,
245                                   uint64_t iova, size_t len)
246 {
247         int ret;
248
249         if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
250                 return -1;
251
252         ret = vhost_vdpa_dma_map(dev, addr, iova, len);
253
254         if (vhost_vdpa_iotlb_batch_end(dev) < 0)
255                 return -1;
256
257         return ret;
258 }
259
260 static int
261 vhost_vdpa_dma_unmap_batch(struct virtio_user_dev *dev, void *addr,
262                                   uint64_t iova, size_t len)
263 {
264         int ret;
265
266         if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
267                 return -1;
268
269         ret = vhost_vdpa_dma_unmap(dev, addr, iova, len);
270
271         if (vhost_vdpa_iotlb_batch_end(dev) < 0)
272                 return -1;
273
274         return ret;
275 }
276
277 static int
278 vhost_vdpa_map_contig(const struct rte_memseg_list *msl,
279                 const struct rte_memseg *ms, size_t len, void *arg)
280 {
281         struct virtio_user_dev *dev = arg;
282
283         if (msl->external)
284                 return 0;
285
286         return vhost_vdpa_dma_map(dev, ms->addr, ms->iova, len);
287 }
288
289 static int
290 vhost_vdpa_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
291                 void *arg)
292 {
293         struct virtio_user_dev *dev = arg;
294
295         /* skip external memory that isn't a heap */
296         if (msl->external && !msl->heap)
297                 return 0;
298
299         /* skip any segments with invalid IOVA addresses */
300         if (ms->iova == RTE_BAD_IOVA)
301                 return 0;
302
303         /* if IOVA mode is VA, we've already mapped the internal segments */
304         if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
305                 return 0;
306
307         return vhost_vdpa_dma_map(dev, ms->addr, ms->iova, ms->len);
308 }
309
310 static int
311 vhost_vdpa_set_memory_table(struct virtio_user_dev *dev)
312 {
313         int ret;
314
315         if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
316                 return -1;
317
318         vhost_vdpa_dma_unmap(dev, NULL, 0, SIZE_MAX);
319
320         if (rte_eal_iova_mode() == RTE_IOVA_VA) {
321                 /* with IOVA as VA mode, we can get away with mapping contiguous
322                  * chunks rather than going page-by-page.
323                  */
324                 ret = rte_memseg_contig_walk_thread_unsafe(
325                                 vhost_vdpa_map_contig, dev);
326                 if (ret)
327                         goto batch_end;
328                 /* we have to continue the walk because we've skipped the
329                  * external segments during the config walk.
330                  */
331         }
332         ret = rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
333
334 batch_end:
335         if (vhost_vdpa_iotlb_batch_end(dev) < 0)
336                 return -1;
337
338         return ret;
339 }
340
341 static int
342 vhost_vdpa_set_vring_enable(struct virtio_user_dev *dev, struct vhost_vring_state *state)
343 {
344         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_VDPA_SET_VRING_ENABLE, state);
345 }
346
347 static int
348 vhost_vdpa_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
349 {
350         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_VRING_NUM, state);
351 }
352
353 static int
354 vhost_vdpa_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
355 {
356         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_VRING_BASE, state);
357 }
358
359 static int
360 vhost_vdpa_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
361 {
362         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_GET_VRING_BASE, state);
363 }
364
365 static int
366 vhost_vdpa_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
367 {
368         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_VRING_CALL, file);
369 }
370
371 static int
372 vhost_vdpa_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
373 {
374         return vhost_vdpa_ioctl(dev->vhostfd, VHOST_SET_VRING_KICK, file);
375 }
376
377 /* with below features, vhost vdpa does not need to do the checksum and TSO,
378  * these info will be passed to virtio_user through virtio net header.
379  */
380 #define VHOST_VDPA_GUEST_OFFLOADS_MASK  \
381         ((1ULL << VIRTIO_NET_F_GUEST_CSUM) |    \
382          (1ULL << VIRTIO_NET_F_GUEST_TSO4) |    \
383          (1ULL << VIRTIO_NET_F_GUEST_TSO6) |    \
384          (1ULL << VIRTIO_NET_F_GUEST_ECN)  |    \
385          (1ULL << VIRTIO_NET_F_GUEST_UFO))
386
387 #define VHOST_VDPA_HOST_OFFLOADS_MASK           \
388         ((1ULL << VIRTIO_NET_F_HOST_TSO4) |     \
389          (1ULL << VIRTIO_NET_F_HOST_TSO6) |     \
390          (1ULL << VIRTIO_NET_F_CSUM))
391
392 static int
393 vhost_vdpa_send_request(struct virtio_user_dev *dev,
394                    enum vhost_user_request req,
395                    void *arg)
396 {
397         int ret = -1;
398         uint64_t req_vdpa;
399
400         PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
401
402         req_vdpa = vhost_req_user_to_vdpa[req];
403
404         switch (req_vdpa) {
405         case VHOST_SET_VRING_ADDR:
406                 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u",
407                             dev->vhostfd, *(unsigned int *)arg);
408                 break;
409         default:
410                 break;
411         }
412
413         ret = ioctl(dev->vhostfd, req_vdpa, arg);
414         if (ret < 0)
415                 PMD_DRV_LOG(ERR, "%s failed: %s",
416                             vhost_msg_strings[req], strerror(errno));
417
418         return ret;
419 }
420
421 /**
422  * Set up environment to talk with a vhost vdpa backend.
423  *
424  * @return
425  *   - (-1) if fail to set up;
426  *   - (>=0) if successful.
427  */
428 static int
429 vhost_vdpa_setup(struct virtio_user_dev *dev)
430 {
431         uint32_t did = (uint32_t)-1;
432
433         dev->vhostfd = open(dev->path, O_RDWR);
434         if (dev->vhostfd < 0) {
435                 PMD_DRV_LOG(ERR, "Failed to open %s: %s\n",
436                                 dev->path, strerror(errno));
437                 return -1;
438         }
439
440         if (ioctl(dev->vhostfd, VHOST_VDPA_GET_DEVICE_ID, &did) < 0 ||
441                         did != VIRTIO_ID_NETWORK) {
442                 PMD_DRV_LOG(ERR, "Invalid vdpa device ID: %u\n", did);
443                 return -1;
444         }
445
446         return 0;
447 }
448
449 static int
450 vhost_vdpa_enable_queue_pair(struct virtio_user_dev *dev,
451                                uint16_t pair_idx,
452                                int enable)
453 {
454         int i;
455
456         if (dev->qp_enabled[pair_idx] == enable)
457                 return 0;
458
459         for (i = 0; i < 2; ++i) {
460                 struct vhost_vring_state state = {
461                         .index = pair_idx * 2 + i,
462                         .num   = enable,
463                 };
464
465                 if (vhost_vdpa_set_vring_enable(dev, &state))
466                         return -1;
467         }
468
469         dev->qp_enabled[pair_idx] = enable;
470
471         return 0;
472 }
473
474 struct virtio_user_backend_ops virtio_ops_vdpa = {
475         .setup = vhost_vdpa_setup,
476         .set_owner = vhost_vdpa_set_owner,
477         .get_features = vhost_vdpa_get_features,
478         .set_features = vhost_vdpa_set_features,
479         .get_protocol_features = vhost_vdpa_get_backend_features,
480         .set_protocol_features = vhost_vdpa_set_backend_features,
481         .set_memory_table = vhost_vdpa_set_memory_table,
482         .set_vring_num = vhost_vdpa_set_vring_num,
483         .set_vring_base = vhost_vdpa_set_vring_base,
484         .get_vring_base = vhost_vdpa_get_vring_base,
485         .set_vring_call = vhost_vdpa_set_vring_call,
486         .set_vring_kick = vhost_vdpa_set_vring_kick,
487         .send_request = vhost_vdpa_send_request,
488         .enable_qp = vhost_vdpa_enable_queue_pair,
489         .dma_map = vhost_vdpa_dma_map_batch,
490         .dma_unmap = vhost_vdpa_dma_unmap_batch,
491 };