vhost: add shadow used ring support for packed rings
[dpdk.git] / lib / librte_vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_ethdev.h>
16 #include <rte_log.h>
17 #include <rte_string_fns.h>
18 #include <rte_memory.h>
19 #include <rte_malloc.h>
20 #include <rte_vhost.h>
21 #include <rte_rwlock.h>
22
23 #include "iotlb.h"
24 #include "vhost.h"
25 #include "vhost_user.h"
26
27 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
28
29 /* Called with iotlb_lock read-locked */
30 uint64_t
31 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
32                     uint64_t iova, uint64_t *size, uint8_t perm)
33 {
34         uint64_t vva, tmp_size;
35
36         if (unlikely(!*size))
37                 return 0;
38
39         tmp_size = *size;
40
41         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
42         if (tmp_size == *size)
43                 return vva;
44
45         iova += tmp_size;
46
47         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
48                 /*
49                  * iotlb_lock is read-locked for a full burst,
50                  * but it only protects the iotlb cache.
51                  * In case of IOTLB miss, we might block on the socket,
52                  * which could cause a deadlock with QEMU if an IOTLB update
53                  * is being handled. We can safely unlock here to avoid it.
54                  */
55                 vhost_user_iotlb_rd_unlock(vq);
56
57                 vhost_user_iotlb_pending_insert(vq, iova, perm);
58                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
59                         RTE_LOG(ERR, VHOST_CONFIG,
60                                 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
61                                 iova);
62                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
63                 }
64
65                 vhost_user_iotlb_rd_lock(vq);
66         }
67
68         return 0;
69 }
70
71 void
72 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
73 {
74         if ((vq->callfd >= 0) && (destroy != 0))
75                 close(vq->callfd);
76         if (vq->kickfd >= 0)
77                 close(vq->kickfd);
78 }
79
80 /*
81  * Unmap any memory, close any file descriptors and
82  * free any memory owned by a device.
83  */
84 void
85 cleanup_device(struct virtio_net *dev, int destroy)
86 {
87         uint32_t i;
88
89         vhost_backend_cleanup(dev);
90
91         for (i = 0; i < dev->nr_vring; i++)
92                 cleanup_vq(dev->virtqueue[i], destroy);
93 }
94
95 void
96 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
97 {
98         if (vq_is_packed(dev))
99                 rte_free(vq->shadow_used_packed);
100         else
101                 rte_free(vq->shadow_used_split);
102         rte_free(vq->batch_copy_elems);
103         rte_mempool_free(vq->iotlb_pool);
104         rte_free(vq);
105 }
106
107 /*
108  * Release virtqueues and device memory.
109  */
110 static void
111 free_device(struct virtio_net *dev)
112 {
113         uint32_t i;
114
115         for (i = 0; i < dev->nr_vring; i++)
116                 free_vq(dev, dev->virtqueue[i]);
117
118         rte_free(dev);
119 }
120
121 static int
122 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
123 {
124         uint64_t req_size, size;
125
126         req_size = sizeof(struct vring_desc) * vq->size;
127         size = req_size;
128         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
129                                                 vq->ring_addrs.desc_user_addr,
130                                                 &size, VHOST_ACCESS_RW);
131         if (!vq->desc || size != req_size)
132                 return -1;
133
134         req_size = sizeof(struct vring_avail);
135         req_size += sizeof(uint16_t) * vq->size;
136         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
137                 req_size += sizeof(uint16_t);
138         size = req_size;
139         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
140                                                 vq->ring_addrs.avail_user_addr,
141                                                 &size, VHOST_ACCESS_RW);
142         if (!vq->avail || size != req_size)
143                 return -1;
144
145         req_size = sizeof(struct vring_used);
146         req_size += sizeof(struct vring_used_elem) * vq->size;
147         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
148                 req_size += sizeof(uint16_t);
149         size = req_size;
150         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
151                                                 vq->ring_addrs.used_user_addr,
152                                                 &size, VHOST_ACCESS_RW);
153         if (!vq->used || size != req_size)
154                 return -1;
155
156         return 0;
157 }
158
159 static int
160 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
161 {
162         uint64_t req_size, size;
163
164         req_size = sizeof(struct vring_packed_desc) * vq->size;
165         size = req_size;
166         vq->desc_packed =
167                 (struct vring_packed_desc *)(uintptr_t)vhost_iova_to_vva(dev,
168                                         vq, vq->ring_addrs.desc_user_addr,
169                                         &size, VHOST_ACCESS_RW);
170         if (!vq->desc_packed || size != req_size)
171                 return -1;
172
173         return 0;
174 }
175
176 int
177 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
178 {
179
180         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
181                 goto out;
182
183         if (vq_is_packed(dev)) {
184                 if (vring_translate_packed(dev, vq) < 0)
185                         return -1;
186         } else {
187                 if (vring_translate_split(dev, vq) < 0)
188                         return -1;
189         }
190 out:
191         vq->access_ok = 1;
192
193         return 0;
194 }
195
196 void
197 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
198 {
199         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
200                 vhost_user_iotlb_wr_lock(vq);
201
202         vq->access_ok = 0;
203         vq->desc = NULL;
204         vq->avail = NULL;
205         vq->used = NULL;
206
207         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
208                 vhost_user_iotlb_wr_unlock(vq);
209 }
210
211 static void
212 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
213 {
214         struct vhost_virtqueue *vq;
215
216         if (vring_idx >= VHOST_MAX_VRING) {
217                 RTE_LOG(ERR, VHOST_CONFIG,
218                                 "Failed not init vring, out of bound (%d)\n",
219                                 vring_idx);
220                 return;
221         }
222
223         vq = dev->virtqueue[vring_idx];
224
225         memset(vq, 0, sizeof(struct vhost_virtqueue));
226
227         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
228         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
229
230         vhost_user_iotlb_init(dev, vring_idx);
231         /* Backends are set to -1 indicating an inactive device. */
232         vq->backend = -1;
233
234         TAILQ_INIT(&vq->zmbuf_list);
235 }
236
237 static void
238 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
239 {
240         struct vhost_virtqueue *vq;
241         int callfd;
242
243         if (vring_idx >= VHOST_MAX_VRING) {
244                 RTE_LOG(ERR, VHOST_CONFIG,
245                                 "Failed not init vring, out of bound (%d)\n",
246                                 vring_idx);
247                 return;
248         }
249
250         vq = dev->virtqueue[vring_idx];
251         callfd = vq->callfd;
252         init_vring_queue(dev, vring_idx);
253         vq->callfd = callfd;
254 }
255
256 int
257 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
258 {
259         struct vhost_virtqueue *vq;
260
261         vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
262         if (vq == NULL) {
263                 RTE_LOG(ERR, VHOST_CONFIG,
264                         "Failed to allocate memory for vring:%u.\n", vring_idx);
265                 return -1;
266         }
267
268         dev->virtqueue[vring_idx] = vq;
269         init_vring_queue(dev, vring_idx);
270         rte_spinlock_init(&vq->access_lock);
271         vq->avail_wrap_counter = 1;
272         vq->used_wrap_counter = 1;
273
274         dev->nr_vring += 1;
275
276         return 0;
277 }
278
279 /*
280  * Reset some variables in device structure, while keeping few
281  * others untouched, such as vid, ifname, nr_vring: they
282  * should be same unless the device is removed.
283  */
284 void
285 reset_device(struct virtio_net *dev)
286 {
287         uint32_t i;
288
289         dev->features = 0;
290         dev->protocol_features = 0;
291         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
292
293         for (i = 0; i < dev->nr_vring; i++)
294                 reset_vring_queue(dev, i);
295 }
296
297 /*
298  * Invoked when there is a new vhost-user connection established (when
299  * there is a new virtio device being attached).
300  */
301 int
302 vhost_new_device(void)
303 {
304         struct virtio_net *dev;
305         int i;
306
307         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
308                 if (vhost_devices[i] == NULL)
309                         break;
310         }
311
312         if (i == MAX_VHOST_DEVICE) {
313                 RTE_LOG(ERR, VHOST_CONFIG,
314                         "Failed to find a free slot for new device.\n");
315                 return -1;
316         }
317
318         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
319         if (dev == NULL) {
320                 RTE_LOG(ERR, VHOST_CONFIG,
321                         "Failed to allocate memory for new dev.\n");
322                 return -1;
323         }
324
325         vhost_devices[i] = dev;
326         dev->vid = i;
327         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
328         dev->slave_req_fd = -1;
329         dev->vdpa_dev_id = -1;
330         rte_spinlock_init(&dev->slave_req_lock);
331
332         return i;
333 }
334
335 void
336 vhost_destroy_device_notify(struct virtio_net *dev)
337 {
338         struct rte_vdpa_device *vdpa_dev;
339         int did;
340
341         if (dev->flags & VIRTIO_DEV_RUNNING) {
342                 did = dev->vdpa_dev_id;
343                 vdpa_dev = rte_vdpa_get_device(did);
344                 if (vdpa_dev && vdpa_dev->ops->dev_close)
345                         vdpa_dev->ops->dev_close(dev->vid);
346                 dev->flags &= ~VIRTIO_DEV_RUNNING;
347                 dev->notify_ops->destroy_device(dev->vid);
348         }
349 }
350
351 /*
352  * Invoked when there is the vhost-user connection is broken (when
353  * the virtio device is being detached).
354  */
355 void
356 vhost_destroy_device(int vid)
357 {
358         struct virtio_net *dev = get_device(vid);
359
360         if (dev == NULL)
361                 return;
362
363         vhost_destroy_device_notify(dev);
364
365         cleanup_device(dev, 1);
366         free_device(dev);
367
368         vhost_devices[vid] = NULL;
369 }
370
371 void
372 vhost_attach_vdpa_device(int vid, int did)
373 {
374         struct virtio_net *dev = get_device(vid);
375
376         if (dev == NULL)
377                 return;
378
379         if (rte_vdpa_get_device(did) == NULL)
380                 return;
381
382         dev->vdpa_dev_id = did;
383 }
384
385 void
386 vhost_detach_vdpa_device(int vid)
387 {
388         struct virtio_net *dev = get_device(vid);
389
390         if (dev == NULL)
391                 return;
392
393         vhost_user_host_notifier_ctrl(vid, false);
394
395         dev->vdpa_dev_id = -1;
396 }
397
398 void
399 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
400 {
401         struct virtio_net *dev;
402         unsigned int len;
403
404         dev = get_device(vid);
405         if (dev == NULL)
406                 return;
407
408         len = if_len > sizeof(dev->ifname) ?
409                 sizeof(dev->ifname) : if_len;
410
411         strncpy(dev->ifname, if_name, len);
412         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
413 }
414
415 void
416 vhost_enable_dequeue_zero_copy(int vid)
417 {
418         struct virtio_net *dev = get_device(vid);
419
420         if (dev == NULL)
421                 return;
422
423         dev->dequeue_zero_copy = 1;
424 }
425
426 void
427 vhost_set_builtin_virtio_net(int vid, bool enable)
428 {
429         struct virtio_net *dev = get_device(vid);
430
431         if (dev == NULL)
432                 return;
433
434         if (enable)
435                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
436         else
437                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
438 }
439
440 int
441 rte_vhost_get_mtu(int vid, uint16_t *mtu)
442 {
443         struct virtio_net *dev = get_device(vid);
444
445         if (!dev)
446                 return -ENODEV;
447
448         if (!(dev->flags & VIRTIO_DEV_READY))
449                 return -EAGAIN;
450
451         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
452                 return -ENOTSUP;
453
454         *mtu = dev->mtu;
455
456         return 0;
457 }
458
459 int
460 rte_vhost_get_numa_node(int vid)
461 {
462 #ifdef RTE_LIBRTE_VHOST_NUMA
463         struct virtio_net *dev = get_device(vid);
464         int numa_node;
465         int ret;
466
467         if (dev == NULL)
468                 return -1;
469
470         ret = get_mempolicy(&numa_node, NULL, 0, dev,
471                             MPOL_F_NODE | MPOL_F_ADDR);
472         if (ret < 0) {
473                 RTE_LOG(ERR, VHOST_CONFIG,
474                         "(%d) failed to query numa node: %s\n",
475                         vid, rte_strerror(errno));
476                 return -1;
477         }
478
479         return numa_node;
480 #else
481         RTE_SET_USED(vid);
482         return -1;
483 #endif
484 }
485
486 uint32_t
487 rte_vhost_get_queue_num(int vid)
488 {
489         struct virtio_net *dev = get_device(vid);
490
491         if (dev == NULL)
492                 return 0;
493
494         return dev->nr_vring / 2;
495 }
496
497 uint16_t
498 rte_vhost_get_vring_num(int vid)
499 {
500         struct virtio_net *dev = get_device(vid);
501
502         if (dev == NULL)
503                 return 0;
504
505         return dev->nr_vring;
506 }
507
508 int
509 rte_vhost_get_ifname(int vid, char *buf, size_t len)
510 {
511         struct virtio_net *dev = get_device(vid);
512
513         if (dev == NULL)
514                 return -1;
515
516         len = RTE_MIN(len, sizeof(dev->ifname));
517
518         strncpy(buf, dev->ifname, len);
519         buf[len - 1] = '\0';
520
521         return 0;
522 }
523
524 int
525 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
526 {
527         struct virtio_net *dev;
528
529         dev = get_device(vid);
530         if (!dev)
531                 return -1;
532
533         *features = dev->features;
534         return 0;
535 }
536
537 int
538 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
539 {
540         struct virtio_net *dev;
541         struct rte_vhost_memory *m;
542         size_t size;
543
544         dev = get_device(vid);
545         if (!dev)
546                 return -1;
547
548         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
549         m = malloc(sizeof(struct rte_vhost_memory) + size);
550         if (!m)
551                 return -1;
552
553         m->nregions = dev->mem->nregions;
554         memcpy(m->regions, dev->mem->regions, size);
555         *mem = m;
556
557         return 0;
558 }
559
560 int
561 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
562                           struct rte_vhost_vring *vring)
563 {
564         struct virtio_net *dev;
565         struct vhost_virtqueue *vq;
566
567         dev = get_device(vid);
568         if (!dev)
569                 return -1;
570
571         if (vring_idx >= VHOST_MAX_VRING)
572                 return -1;
573
574         vq = dev->virtqueue[vring_idx];
575         if (!vq)
576                 return -1;
577
578         vring->desc  = vq->desc;
579         vring->avail = vq->avail;
580         vring->used  = vq->used;
581         vring->log_guest_addr  = vq->log_guest_addr;
582
583         vring->callfd  = vq->callfd;
584         vring->kickfd  = vq->kickfd;
585         vring->size    = vq->size;
586
587         return 0;
588 }
589
590 int
591 rte_vhost_vring_call(int vid, uint16_t vring_idx)
592 {
593         struct virtio_net *dev;
594         struct vhost_virtqueue *vq;
595
596         dev = get_device(vid);
597         if (!dev)
598                 return -1;
599
600         if (vring_idx >= VHOST_MAX_VRING)
601                 return -1;
602
603         vq = dev->virtqueue[vring_idx];
604         if (!vq)
605                 return -1;
606
607         vhost_vring_call(dev, vq);
608         return 0;
609 }
610
611 uint16_t
612 rte_vhost_avail_entries(int vid, uint16_t queue_id)
613 {
614         struct virtio_net *dev;
615         struct vhost_virtqueue *vq;
616
617         dev = get_device(vid);
618         if (!dev)
619                 return 0;
620
621         vq = dev->virtqueue[queue_id];
622         if (!vq->enabled)
623                 return 0;
624
625         return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
626 }
627
628 int
629 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
630 {
631         struct virtio_net *dev = get_device(vid);
632
633         if (!dev)
634                 return -1;
635
636         if (enable)
637                 dev->virtqueue[queue_id]->used->flags &=
638                         ~VRING_USED_F_NO_NOTIFY;
639         else
640                 dev->virtqueue[queue_id]->used->flags |= VRING_USED_F_NO_NOTIFY;
641         return 0;
642 }
643
644 void
645 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
646 {
647         struct virtio_net *dev = get_device(vid);
648
649         if (dev == NULL)
650                 return;
651
652         vhost_log_write(dev, addr, len);
653 }
654
655 void
656 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
657                          uint64_t offset, uint64_t len)
658 {
659         struct virtio_net *dev;
660         struct vhost_virtqueue *vq;
661
662         dev = get_device(vid);
663         if (dev == NULL)
664                 return;
665
666         if (vring_idx >= VHOST_MAX_VRING)
667                 return;
668         vq = dev->virtqueue[vring_idx];
669         if (!vq)
670                 return;
671
672         vhost_log_used_vring(dev, vq, offset, len);
673 }
674
675 uint32_t
676 rte_vhost_rx_queue_count(int vid, uint16_t qid)
677 {
678         struct virtio_net *dev;
679         struct vhost_virtqueue *vq;
680
681         dev = get_device(vid);
682         if (dev == NULL)
683                 return 0;
684
685         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
686                 RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
687                         dev->vid, __func__, qid);
688                 return 0;
689         }
690
691         vq = dev->virtqueue[qid];
692         if (vq == NULL)
693                 return 0;
694
695         if (unlikely(vq->enabled == 0 || vq->avail == NULL))
696                 return 0;
697
698         return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
699 }
700
701 int rte_vhost_get_vdpa_device_id(int vid)
702 {
703         struct virtio_net *dev = get_device(vid);
704
705         if (dev == NULL)
706                 return -1;
707
708         return dev->vdpa_dev_id;
709 }
710
711 int rte_vhost_get_log_base(int vid, uint64_t *log_base,
712                 uint64_t *log_size)
713 {
714         struct virtio_net *dev = get_device(vid);
715
716         if (!dev)
717                 return -1;
718
719         if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
720                 RTE_LOG(ERR, VHOST_DATA,
721                         "(%d) %s: built-in vhost net backend is disabled.\n",
722                         dev->vid, __func__);
723                 return -1;
724         }
725
726         *log_base = dev->log_base;
727         *log_size = dev->log_size;
728
729         return 0;
730 }
731
732 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
733                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
734 {
735         struct virtio_net *dev = get_device(vid);
736
737         if (!dev)
738                 return -1;
739
740         if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
741                 RTE_LOG(ERR, VHOST_DATA,
742                         "(%d) %s: built-in vhost net backend is disabled.\n",
743                         dev->vid, __func__);
744                 return -1;
745         }
746
747         *last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
748         *last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
749
750         return 0;
751 }
752
753 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
754                 uint16_t last_avail_idx, uint16_t last_used_idx)
755 {
756         struct virtio_net *dev = get_device(vid);
757
758         if (!dev)
759                 return -1;
760
761         if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
762                 RTE_LOG(ERR, VHOST_DATA,
763                         "(%d) %s: built-in vhost net backend is disabled.\n",
764                         dev->vid, __func__);
765                 return -1;
766         }
767
768         dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
769         dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
770
771         return 0;
772 }