1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
26 #include "vhost_user.h"
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
31 /* Called with iotlb_lock read-locked */
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34 uint64_t iova, uint64_t *size, uint8_t perm)
36 uint64_t vva, tmp_size;
43 vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44 if (tmp_size == *size)
49 if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
51 * iotlb_lock is read-locked for a full burst,
52 * but it only protects the iotlb cache.
53 * In case of IOTLB miss, we might block on the socket,
54 * which could cause a deadlock with QEMU if an IOTLB update
55 * is being handled. We can safely unlock here to avoid it.
57 vhost_user_iotlb_rd_unlock(vq);
59 vhost_user_iotlb_pending_insert(vq, iova, perm);
60 if (vhost_user_iotlb_miss(dev, iova, perm)) {
62 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
64 vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
67 vhost_user_iotlb_rd_lock(vq);
73 #define VHOST_LOG_PAGE 4096
76 * Atomically set a bit in memory.
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
83 * __sync_ built-ins are deprecated, but __atomic_ ones
84 * are sub-optimized in older GCC versions.
86 __sync_fetch_and_or_1(addr, (1U << nr));
88 __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
95 vhost_set_bit(page % 8, &log_base[page / 8]);
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
103 if (unlikely(!dev->log_base || !len))
106 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
109 /* To make sure guest memory updates are committed before logging */
110 rte_atomic_thread_fence(__ATOMIC_RELEASE);
112 page = addr / VHOST_LOG_PAGE;
113 while (page * VHOST_LOG_PAGE < addr + len) {
114 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121 uint64_t iova, uint64_t len)
123 uint64_t hva, gpa, map_len;
126 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127 if (map_len != len) {
129 "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
134 gpa = hva_to_gpa(dev, hva, len);
136 __vhost_log_write(dev, gpa, len);
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
142 unsigned long *log_base;
145 if (unlikely(!dev->log_base))
148 /* No cache, nothing to sync */
149 if (unlikely(!vq->log_cache))
152 rte_atomic_thread_fence(__ATOMIC_RELEASE);
154 log_base = (unsigned long *)(uintptr_t)dev->log_base;
156 for (i = 0; i < vq->log_cache_nb_elem; i++) {
157 struct log_cache_entry *elem = vq->log_cache + i;
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
161 * '__sync' builtins are deprecated, but '__atomic' ones
162 * are sub-optimized in older GCC versions.
164 __sync_fetch_and_or(log_base + elem->offset, elem->val);
166 __atomic_fetch_or(log_base + elem->offset, elem->val,
171 rte_atomic_thread_fence(__ATOMIC_RELEASE);
173 vq->log_cache_nb_elem = 0;
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
180 uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181 uint32_t offset = page / (sizeof(unsigned long) << 3);
184 if (unlikely(!vq->log_cache)) {
185 /* No logging cache allocated, write dirty log map directly */
186 rte_atomic_thread_fence(__ATOMIC_RELEASE);
187 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
192 for (i = 0; i < vq->log_cache_nb_elem; i++) {
193 struct log_cache_entry *elem = vq->log_cache + i;
195 if (elem->offset == offset) {
196 elem->val |= (1UL << bit_nr);
201 if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
203 * No more room for a new log cache entry,
204 * so write the dirty log map directly.
206 rte_atomic_thread_fence(__ATOMIC_RELEASE);
207 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
212 vq->log_cache[i].offset = offset;
213 vq->log_cache[i].val = (1UL << bit_nr);
214 vq->log_cache_nb_elem++;
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219 uint64_t addr, uint64_t len)
223 if (unlikely(!dev->log_base || !len))
226 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
229 page = addr / VHOST_LOG_PAGE;
230 while (page * VHOST_LOG_PAGE < addr + len) {
231 vhost_log_cache_page(dev, vq, page);
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238 uint64_t iova, uint64_t len)
240 uint64_t hva, gpa, map_len;
243 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244 if (map_len != len) {
246 "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
251 gpa = hva_to_gpa(dev, hva, len);
253 __vhost_log_cache_write(dev, vq, gpa, len);
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258 uint64_t desc_addr, uint64_t desc_len)
262 uint64_t len, remain = desc_len;
264 idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
265 if (unlikely(!idesc))
268 dst = (uint64_t)(uintptr_t)idesc;
272 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
274 if (unlikely(!src || !len)) {
279 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
292 if ((vq->callfd >= 0) && (destroy != 0))
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
301 if (!(dev->protocol_features &
302 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
305 if (vq_is_packed(dev)) {
306 if (vq->inflight_packed)
307 vq->inflight_packed = NULL;
309 if (vq->inflight_split)
310 vq->inflight_split = NULL;
313 if (vq->resubmit_inflight) {
314 if (vq->resubmit_inflight->resubmit_list) {
315 rte_free(vq->resubmit_inflight->resubmit_list);
316 vq->resubmit_inflight->resubmit_list = NULL;
318 rte_free(vq->resubmit_inflight);
319 vq->resubmit_inflight = NULL;
324 * Unmap any memory, close any file descriptors and
325 * free any memory owned by a device.
328 cleanup_device(struct virtio_net *dev, int destroy)
332 vhost_backend_cleanup(dev);
334 for (i = 0; i < dev->nr_vring; i++) {
335 cleanup_vq(dev->virtqueue[i], destroy);
336 cleanup_vq_inflight(dev, dev->virtqueue[i]);
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
346 rte_free(vq->async->pkts_info);
348 rte_free(vq->async->buffers_packed);
349 vq->async->buffers_packed = NULL;
350 rte_free(vq->async->descs_split);
351 vq->async->descs_split = NULL;
358 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
360 if (vq_is_packed(dev))
361 rte_free(vq->shadow_used_packed);
363 rte_free(vq->shadow_used_split);
365 vhost_free_async_mem(vq);
366 rte_free(vq->batch_copy_elems);
367 rte_mempool_free(vq->iotlb_pool);
368 rte_free(vq->log_cache);
373 * Release virtqueues and device memory.
376 free_device(struct virtio_net *dev)
380 for (i = 0; i < dev->nr_vring; i++)
381 free_vq(dev, dev->virtqueue[i]);
386 static __rte_always_inline int
387 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
389 if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
392 vq->log_guest_addr = translate_log_addr(dev, vq,
393 vq->ring_addrs.log_guest_addr);
394 if (vq->log_guest_addr == 0)
401 * Converts vring log address to GPA
402 * If IOMMU is enabled, the log address is IOVA
403 * If IOMMU not enabled, the log address is already GPA
405 * Caller should have iotlb_lock read-locked
408 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
411 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
412 const uint64_t exp_size = sizeof(uint64_t);
414 uint64_t size = exp_size;
416 hva = vhost_iova_to_vva(dev, vq, log_addr,
417 &size, VHOST_ACCESS_RW);
419 if (size != exp_size)
422 gpa = hva_to_gpa(dev, hva, exp_size);
424 VHOST_LOG_CONFIG(ERR,
425 "VQ: Failed to find GPA for log_addr: 0x%"
426 PRIx64 " hva: 0x%" PRIx64 "\n",
436 /* Caller should have iotlb_lock read-locked */
438 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
440 uint64_t req_size, size;
442 req_size = sizeof(struct vring_desc) * vq->size;
444 vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
445 vq->ring_addrs.desc_user_addr,
446 &size, VHOST_ACCESS_RW);
447 if (!vq->desc || size != req_size)
450 req_size = sizeof(struct vring_avail);
451 req_size += sizeof(uint16_t) * vq->size;
452 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
453 req_size += sizeof(uint16_t);
455 vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
456 vq->ring_addrs.avail_user_addr,
457 &size, VHOST_ACCESS_RW);
458 if (!vq->avail || size != req_size)
461 req_size = sizeof(struct vring_used);
462 req_size += sizeof(struct vring_used_elem) * vq->size;
463 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
464 req_size += sizeof(uint16_t);
466 vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
467 vq->ring_addrs.used_user_addr,
468 &size, VHOST_ACCESS_RW);
469 if (!vq->used || size != req_size)
475 /* Caller should have iotlb_lock read-locked */
477 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
479 uint64_t req_size, size;
481 req_size = sizeof(struct vring_packed_desc) * vq->size;
483 vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
484 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
485 &size, VHOST_ACCESS_RW);
486 if (!vq->desc_packed || size != req_size)
489 req_size = sizeof(struct vring_packed_desc_event);
491 vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
492 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
493 &size, VHOST_ACCESS_RW);
494 if (!vq->driver_event || size != req_size)
497 req_size = sizeof(struct vring_packed_desc_event);
499 vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
500 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
501 &size, VHOST_ACCESS_RW);
502 if (!vq->device_event || size != req_size)
509 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
512 if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
515 if (vq_is_packed(dev)) {
516 if (vring_translate_packed(dev, vq) < 0)
519 if (vring_translate_split(dev, vq) < 0)
523 if (log_translate(dev, vq) < 0)
526 vq->access_ok = true;
532 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
534 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
535 vhost_user_iotlb_wr_lock(vq);
537 vq->access_ok = false;
541 vq->log_guest_addr = 0;
543 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
544 vhost_user_iotlb_wr_unlock(vq);
548 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
550 struct vhost_virtqueue *vq;
551 int numa_node = SOCKET_ID_ANY;
553 if (vring_idx >= VHOST_MAX_VRING) {
554 VHOST_LOG_CONFIG(ERR,
555 "Failed not init vring, out of bound (%d)\n",
560 vq = dev->virtqueue[vring_idx];
562 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
567 memset(vq, 0, sizeof(struct vhost_virtqueue));
569 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
570 vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
571 vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
573 #ifdef RTE_LIBRTE_VHOST_NUMA
574 if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
575 VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
576 dev->vid, rte_strerror(errno));
577 numa_node = SOCKET_ID_ANY;
580 vq->numa_node = numa_node;
582 vhost_user_iotlb_init(dev, vring_idx);
586 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
588 struct vhost_virtqueue *vq;
591 if (vring_idx >= VHOST_MAX_VRING) {
592 VHOST_LOG_CONFIG(ERR,
593 "Failed not init vring, out of bound (%d)\n",
598 vq = dev->virtqueue[vring_idx];
600 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
606 init_vring_queue(dev, vring_idx);
611 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
613 struct vhost_virtqueue *vq;
616 /* Also allocate holes, if any, up to requested vring index. */
617 for (i = 0; i <= vring_idx; i++) {
618 if (dev->virtqueue[i])
621 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
623 VHOST_LOG_CONFIG(ERR,
624 "Failed to allocate memory for vring:%u.\n", i);
628 dev->virtqueue[i] = vq;
629 init_vring_queue(dev, i);
630 rte_spinlock_init(&vq->access_lock);
631 vq->avail_wrap_counter = 1;
632 vq->used_wrap_counter = 1;
633 vq->signalled_used_valid = false;
636 dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
642 * Reset some variables in device structure, while keeping few
643 * others untouched, such as vid, ifname, nr_vring: they
644 * should be same unless the device is removed.
647 reset_device(struct virtio_net *dev)
652 dev->protocol_features = 0;
653 dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
655 for (i = 0; i < dev->nr_vring; i++)
656 reset_vring_queue(dev, i);
660 * Invoked when there is a new vhost-user connection established (when
661 * there is a new virtio device being attached).
664 vhost_new_device(void)
666 struct virtio_net *dev;
669 pthread_mutex_lock(&vhost_dev_lock);
670 for (i = 0; i < MAX_VHOST_DEVICE; i++) {
671 if (vhost_devices[i] == NULL)
675 if (i == MAX_VHOST_DEVICE) {
676 VHOST_LOG_CONFIG(ERR,
677 "Failed to find a free slot for new device.\n");
678 pthread_mutex_unlock(&vhost_dev_lock);
682 dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
684 VHOST_LOG_CONFIG(ERR,
685 "Failed to allocate memory for new dev.\n");
686 pthread_mutex_unlock(&vhost_dev_lock);
690 vhost_devices[i] = dev;
691 pthread_mutex_unlock(&vhost_dev_lock);
694 dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
695 dev->slave_req_fd = -1;
696 dev->postcopy_ufd = -1;
697 rte_spinlock_init(&dev->slave_req_lock);
703 vhost_destroy_device_notify(struct virtio_net *dev)
705 struct rte_vdpa_device *vdpa_dev;
707 if (dev->flags & VIRTIO_DEV_RUNNING) {
708 vdpa_dev = dev->vdpa_dev;
710 vdpa_dev->ops->dev_close(dev->vid);
711 dev->flags &= ~VIRTIO_DEV_RUNNING;
712 dev->notify_ops->destroy_device(dev->vid);
717 * Invoked when there is the vhost-user connection is broken (when
718 * the virtio device is being detached).
721 vhost_destroy_device(int vid)
723 struct virtio_net *dev = get_device(vid);
728 vhost_destroy_device_notify(dev);
730 cleanup_device(dev, 1);
733 vhost_devices[vid] = NULL;
737 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
739 struct virtio_net *dev = get_device(vid);
744 dev->vdpa_dev = vdpa_dev;
748 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
750 struct virtio_net *dev;
753 dev = get_device(vid);
757 len = if_len > sizeof(dev->ifname) ?
758 sizeof(dev->ifname) : if_len;
760 strncpy(dev->ifname, if_name, len);
761 dev->ifname[sizeof(dev->ifname) - 1] = '\0';
765 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
767 struct virtio_net *dev = get_device(vid);
773 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
775 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
776 if (!compliant_ol_flags)
777 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
779 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
783 vhost_enable_extbuf(int vid)
785 struct virtio_net *dev = get_device(vid);
794 vhost_enable_linearbuf(int vid)
796 struct virtio_net *dev = get_device(vid);
805 rte_vhost_get_mtu(int vid, uint16_t *mtu)
807 struct virtio_net *dev = get_device(vid);
809 if (dev == NULL || mtu == NULL)
812 if (!(dev->flags & VIRTIO_DEV_READY))
815 if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
824 rte_vhost_get_numa_node(int vid)
826 #ifdef RTE_LIBRTE_VHOST_NUMA
827 struct virtio_net *dev = get_device(vid);
831 if (dev == NULL || numa_available() != 0)
834 ret = get_mempolicy(&numa_node, NULL, 0, dev,
835 MPOL_F_NODE | MPOL_F_ADDR);
837 VHOST_LOG_CONFIG(ERR,
838 "(%d) failed to query numa node: %s\n",
839 vid, rte_strerror(errno));
851 rte_vhost_get_queue_num(int vid)
853 struct virtio_net *dev = get_device(vid);
858 return dev->nr_vring / 2;
862 rte_vhost_get_vring_num(int vid)
864 struct virtio_net *dev = get_device(vid);
869 return dev->nr_vring;
873 rte_vhost_get_ifname(int vid, char *buf, size_t len)
875 struct virtio_net *dev = get_device(vid);
877 if (dev == NULL || buf == NULL)
880 len = RTE_MIN(len, sizeof(dev->ifname));
882 strncpy(buf, dev->ifname, len);
889 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
891 struct virtio_net *dev;
893 dev = get_device(vid);
894 if (dev == NULL || features == NULL)
897 *features = dev->features;
902 rte_vhost_get_negotiated_protocol_features(int vid,
903 uint64_t *protocol_features)
905 struct virtio_net *dev;
907 dev = get_device(vid);
908 if (dev == NULL || protocol_features == NULL)
911 *protocol_features = dev->protocol_features;
916 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
918 struct virtio_net *dev;
919 struct rte_vhost_memory *m;
922 dev = get_device(vid);
923 if (dev == NULL || mem == NULL)
926 size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
927 m = malloc(sizeof(struct rte_vhost_memory) + size);
931 m->nregions = dev->mem->nregions;
932 memcpy(m->regions, dev->mem->regions, size);
939 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
940 struct rte_vhost_vring *vring)
942 struct virtio_net *dev;
943 struct vhost_virtqueue *vq;
945 dev = get_device(vid);
946 if (dev == NULL || vring == NULL)
949 if (vring_idx >= VHOST_MAX_VRING)
952 vq = dev->virtqueue[vring_idx];
956 if (vq_is_packed(dev)) {
957 vring->desc_packed = vq->desc_packed;
958 vring->driver_event = vq->driver_event;
959 vring->device_event = vq->device_event;
961 vring->desc = vq->desc;
962 vring->avail = vq->avail;
963 vring->used = vq->used;
965 vring->log_guest_addr = vq->log_guest_addr;
967 vring->callfd = vq->callfd;
968 vring->kickfd = vq->kickfd;
969 vring->size = vq->size;
975 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
976 struct rte_vhost_ring_inflight *vring)
978 struct virtio_net *dev;
979 struct vhost_virtqueue *vq;
981 dev = get_device(vid);
985 if (vring_idx >= VHOST_MAX_VRING)
988 vq = dev->virtqueue[vring_idx];
992 if (vq_is_packed(dev)) {
993 if (unlikely(!vq->inflight_packed))
996 vring->inflight_packed = vq->inflight_packed;
998 if (unlikely(!vq->inflight_split))
1001 vring->inflight_split = vq->inflight_split;
1004 vring->resubmit_inflight = vq->resubmit_inflight;
1010 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1013 struct vhost_virtqueue *vq;
1014 struct virtio_net *dev;
1016 dev = get_device(vid);
1020 if (unlikely(!(dev->protocol_features &
1021 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1024 if (unlikely(vq_is_packed(dev)))
1027 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1030 vq = dev->virtqueue[vring_idx];
1034 if (unlikely(!vq->inflight_split))
1037 if (unlikely(idx >= vq->size))
1040 vq->inflight_split->desc[idx].counter = vq->global_counter++;
1041 vq->inflight_split->desc[idx].inflight = 1;
1046 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1047 uint16_t head, uint16_t last,
1048 uint16_t *inflight_entry)
1050 struct rte_vhost_inflight_info_packed *inflight_info;
1051 struct virtio_net *dev;
1052 struct vhost_virtqueue *vq;
1053 struct vring_packed_desc *desc;
1054 uint16_t old_free_head, free_head;
1056 dev = get_device(vid);
1060 if (unlikely(!(dev->protocol_features &
1061 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1064 if (unlikely(!vq_is_packed(dev)))
1067 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1070 vq = dev->virtqueue[vring_idx];
1074 inflight_info = vq->inflight_packed;
1075 if (unlikely(!inflight_info))
1078 if (unlikely(head >= vq->size))
1081 desc = vq->desc_packed;
1082 old_free_head = inflight_info->old_free_head;
1083 if (unlikely(old_free_head >= vq->size))
1086 free_head = old_free_head;
1088 /* init header descriptor */
1089 inflight_info->desc[old_free_head].num = 0;
1090 inflight_info->desc[old_free_head].counter = vq->global_counter++;
1091 inflight_info->desc[old_free_head].inflight = 1;
1093 /* save desc entry in flight entry */
1094 while (head != ((last + 1) % vq->size)) {
1095 inflight_info->desc[old_free_head].num++;
1096 inflight_info->desc[free_head].addr = desc[head].addr;
1097 inflight_info->desc[free_head].len = desc[head].len;
1098 inflight_info->desc[free_head].flags = desc[head].flags;
1099 inflight_info->desc[free_head].id = desc[head].id;
1101 inflight_info->desc[old_free_head].last = free_head;
1102 free_head = inflight_info->desc[free_head].next;
1103 inflight_info->free_head = free_head;
1104 head = (head + 1) % vq->size;
1107 inflight_info->old_free_head = free_head;
1108 *inflight_entry = old_free_head;
1114 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1115 uint16_t last_used_idx, uint16_t idx)
1117 struct virtio_net *dev;
1118 struct vhost_virtqueue *vq;
1120 dev = get_device(vid);
1124 if (unlikely(!(dev->protocol_features &
1125 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1128 if (unlikely(vq_is_packed(dev)))
1131 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1134 vq = dev->virtqueue[vring_idx];
1138 if (unlikely(!vq->inflight_split))
1141 if (unlikely(idx >= vq->size))
1144 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1146 vq->inflight_split->desc[idx].inflight = 0;
1148 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1150 vq->inflight_split->used_idx = last_used_idx;
1155 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1158 struct rte_vhost_inflight_info_packed *inflight_info;
1159 struct virtio_net *dev;
1160 struct vhost_virtqueue *vq;
1162 dev = get_device(vid);
1166 if (unlikely(!(dev->protocol_features &
1167 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1170 if (unlikely(!vq_is_packed(dev)))
1173 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1176 vq = dev->virtqueue[vring_idx];
1180 inflight_info = vq->inflight_packed;
1181 if (unlikely(!inflight_info))
1184 if (unlikely(head >= vq->size))
1187 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1189 inflight_info->desc[head].inflight = 0;
1191 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1193 inflight_info->old_free_head = inflight_info->free_head;
1194 inflight_info->old_used_idx = inflight_info->used_idx;
1195 inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1201 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1204 struct virtio_net *dev;
1205 struct vhost_virtqueue *vq;
1207 dev = get_device(vid);
1211 if (unlikely(!(dev->protocol_features &
1212 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1215 if (unlikely(vq_is_packed(dev)))
1218 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1221 vq = dev->virtqueue[vring_idx];
1225 if (unlikely(!vq->inflight_split))
1228 if (unlikely(idx >= vq->size))
1231 vq->inflight_split->last_inflight_io = idx;
1236 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1239 struct rte_vhost_inflight_info_packed *inflight_info;
1240 struct virtio_net *dev;
1241 struct vhost_virtqueue *vq;
1244 dev = get_device(vid);
1248 if (unlikely(!(dev->protocol_features &
1249 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1252 if (unlikely(!vq_is_packed(dev)))
1255 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1258 vq = dev->virtqueue[vring_idx];
1262 inflight_info = vq->inflight_packed;
1263 if (unlikely(!inflight_info))
1266 if (unlikely(head >= vq->size))
1269 last = inflight_info->desc[head].last;
1270 if (unlikely(last >= vq->size))
1273 inflight_info->desc[last].next = inflight_info->free_head;
1274 inflight_info->free_head = head;
1275 inflight_info->used_idx += inflight_info->desc[head].num;
1276 if (inflight_info->used_idx >= inflight_info->desc_num) {
1277 inflight_info->used_idx -= inflight_info->desc_num;
1278 inflight_info->used_wrap_counter =
1279 !inflight_info->used_wrap_counter;
1286 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1288 struct virtio_net *dev;
1289 struct vhost_virtqueue *vq;
1291 dev = get_device(vid);
1295 if (vring_idx >= VHOST_MAX_VRING)
1298 vq = dev->virtqueue[vring_idx];
1302 if (vq_is_packed(dev))
1303 vhost_vring_call_packed(dev, vq);
1305 vhost_vring_call_split(dev, vq);
1311 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1313 struct virtio_net *dev;
1314 struct vhost_virtqueue *vq;
1317 dev = get_device(vid);
1321 if (queue_id >= VHOST_MAX_VRING)
1324 vq = dev->virtqueue[queue_id];
1328 rte_spinlock_lock(&vq->access_lock);
1330 if (unlikely(!vq->enabled || vq->avail == NULL))
1333 ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1336 rte_spinlock_unlock(&vq->access_lock);
1341 vhost_enable_notify_split(struct virtio_net *dev,
1342 struct vhost_virtqueue *vq, int enable)
1344 if (vq->used == NULL)
1347 if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1349 vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1351 vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1354 vhost_avail_event(vq) = vq->last_avail_idx;
1360 vhost_enable_notify_packed(struct virtio_net *dev,
1361 struct vhost_virtqueue *vq, int enable)
1365 if (vq->device_event == NULL)
1369 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1373 flags = VRING_EVENT_F_ENABLE;
1374 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1375 flags = VRING_EVENT_F_DESC;
1376 vq->device_event->off_wrap = vq->last_avail_idx |
1377 vq->avail_wrap_counter << 15;
1380 rte_atomic_thread_fence(__ATOMIC_RELEASE);
1382 vq->device_event->flags = flags;
1387 vhost_enable_guest_notification(struct virtio_net *dev,
1388 struct vhost_virtqueue *vq, int enable)
1391 * If the virtqueue is not ready yet, it will be applied
1392 * when it will become ready.
1397 if (vq_is_packed(dev))
1398 return vhost_enable_notify_packed(dev, vq, enable);
1400 return vhost_enable_notify_split(dev, vq, enable);
1404 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1406 struct virtio_net *dev = get_device(vid);
1407 struct vhost_virtqueue *vq;
1413 if (queue_id >= VHOST_MAX_VRING)
1416 vq = dev->virtqueue[queue_id];
1420 rte_spinlock_lock(&vq->access_lock);
1422 vq->notif_enable = enable;
1423 ret = vhost_enable_guest_notification(dev, vq, enable);
1425 rte_spinlock_unlock(&vq->access_lock);
1431 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1433 struct virtio_net *dev = get_device(vid);
1438 vhost_log_write(dev, addr, len);
1442 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1443 uint64_t offset, uint64_t len)
1445 struct virtio_net *dev;
1446 struct vhost_virtqueue *vq;
1448 dev = get_device(vid);
1452 if (vring_idx >= VHOST_MAX_VRING)
1454 vq = dev->virtqueue[vring_idx];
1458 vhost_log_used_vring(dev, vq, offset, len);
1462 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1464 struct virtio_net *dev;
1465 struct vhost_virtqueue *vq;
1468 dev = get_device(vid);
1472 if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1473 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1474 dev->vid, __func__, qid);
1478 vq = dev->virtqueue[qid];
1482 rte_spinlock_lock(&vq->access_lock);
1484 if (unlikely(!vq->enabled || vq->avail == NULL))
1487 ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1490 rte_spinlock_unlock(&vq->access_lock);
1494 struct rte_vdpa_device *
1495 rte_vhost_get_vdpa_device(int vid)
1497 struct virtio_net *dev = get_device(vid);
1502 return dev->vdpa_dev;
1506 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1509 struct virtio_net *dev = get_device(vid);
1511 if (dev == NULL || log_base == NULL || log_size == NULL)
1514 *log_base = dev->log_base;
1515 *log_size = dev->log_size;
1521 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1522 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1524 struct vhost_virtqueue *vq;
1525 struct virtio_net *dev = get_device(vid);
1527 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1530 if (queue_id >= VHOST_MAX_VRING)
1533 vq = dev->virtqueue[queue_id];
1537 if (vq_is_packed(dev)) {
1538 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1540 *last_used_idx = (vq->used_wrap_counter << 15) |
1543 *last_avail_idx = vq->last_avail_idx;
1544 *last_used_idx = vq->last_used_idx;
1551 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1552 uint16_t last_avail_idx, uint16_t last_used_idx)
1554 struct vhost_virtqueue *vq;
1555 struct virtio_net *dev = get_device(vid);
1560 if (queue_id >= VHOST_MAX_VRING)
1563 vq = dev->virtqueue[queue_id];
1567 if (vq_is_packed(dev)) {
1568 vq->last_avail_idx = last_avail_idx & 0x7fff;
1569 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1570 vq->last_used_idx = last_used_idx & 0x7fff;
1571 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1573 vq->last_avail_idx = last_avail_idx;
1574 vq->last_used_idx = last_used_idx;
1581 rte_vhost_get_vring_base_from_inflight(int vid,
1583 uint16_t *last_avail_idx,
1584 uint16_t *last_used_idx)
1586 struct rte_vhost_inflight_info_packed *inflight_info;
1587 struct vhost_virtqueue *vq;
1588 struct virtio_net *dev = get_device(vid);
1590 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1593 if (queue_id >= VHOST_MAX_VRING)
1596 vq = dev->virtqueue[queue_id];
1600 if (!vq_is_packed(dev))
1603 inflight_info = vq->inflight_packed;
1607 *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1608 inflight_info->old_used_idx;
1609 *last_used_idx = *last_avail_idx;
1615 rte_vhost_extern_callback_register(int vid,
1616 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1618 struct virtio_net *dev = get_device(vid);
1620 if (dev == NULL || ops == NULL)
1623 dev->extern_ops = *ops;
1624 dev->extern_data = ctx;
1628 static __rte_always_inline int
1629 async_channel_register(int vid, uint16_t queue_id,
1630 struct rte_vhost_async_channel_ops *ops)
1632 struct virtio_net *dev = get_device(vid);
1633 struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1634 struct vhost_async *async;
1635 int node = vq->numa_node;
1637 if (unlikely(vq->async)) {
1638 VHOST_LOG_CONFIG(ERR,
1639 "async register failed: already registered (vid %d, qid: %d)\n",
1644 async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1646 VHOST_LOG_CONFIG(ERR, "failed to allocate async metadata (vid %d, qid: %d)\n",
1651 async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1652 RTE_CACHE_LINE_SIZE, node);
1653 if (!async->pkts_info) {
1654 VHOST_LOG_CONFIG(ERR, "failed to allocate async_pkts_info (vid %d, qid: %d)\n",
1656 goto out_free_async;
1659 if (vq_is_packed(dev)) {
1660 async->buffers_packed = rte_malloc_socket(NULL,
1661 vq->size * sizeof(struct vring_used_elem_packed),
1662 RTE_CACHE_LINE_SIZE, node);
1663 if (!async->buffers_packed) {
1664 VHOST_LOG_CONFIG(ERR, "failed to allocate async buffers (vid %d, qid: %d)\n",
1666 goto out_free_inflight;
1669 async->descs_split = rte_malloc_socket(NULL,
1670 vq->size * sizeof(struct vring_used_elem),
1671 RTE_CACHE_LINE_SIZE, node);
1672 if (!async->descs_split) {
1673 VHOST_LOG_CONFIG(ERR, "failed to allocate async descs (vid %d, qid: %d)\n",
1675 goto out_free_inflight;
1679 async->ops.check_completed_copies = ops->check_completed_copies;
1680 async->ops.transfer_data = ops->transfer_data;
1686 rte_free(async->pkts_info);
1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1695 struct rte_vhost_async_config config,
1696 struct rte_vhost_async_channel_ops *ops)
1698 struct vhost_virtqueue *vq;
1699 struct virtio_net *dev = get_device(vid);
1702 if (dev == NULL || ops == NULL)
1705 if (queue_id >= VHOST_MAX_VRING)
1708 vq = dev->virtqueue[queue_id];
1710 if (unlikely(vq == NULL || !dev->async_copy))
1713 if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1714 VHOST_LOG_CONFIG(ERR,
1715 "async copy is not supported on non-inorder mode "
1716 "(vid %d, qid: %d)\n", vid, queue_id);
1720 if (unlikely(ops->check_completed_copies == NULL ||
1721 ops->transfer_data == NULL))
1724 rte_spinlock_lock(&vq->access_lock);
1725 ret = async_channel_register(vid, queue_id, ops);
1726 rte_spinlock_unlock(&vq->access_lock);
1732 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
1733 struct rte_vhost_async_config config,
1734 struct rte_vhost_async_channel_ops *ops)
1736 struct vhost_virtqueue *vq;
1737 struct virtio_net *dev = get_device(vid);
1739 if (dev == NULL || ops == NULL)
1742 if (queue_id >= VHOST_MAX_VRING)
1745 vq = dev->virtqueue[queue_id];
1747 if (unlikely(vq == NULL || !dev->async_copy))
1750 if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1751 VHOST_LOG_CONFIG(ERR,
1752 "async copy is not supported on non-inorder mode "
1753 "(vid %d, qid: %d)\n", vid, queue_id);
1757 if (unlikely(ops->check_completed_copies == NULL ||
1758 ops->transfer_data == NULL))
1761 return async_channel_register(vid, queue_id, ops);
1765 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1767 struct vhost_virtqueue *vq;
1768 struct virtio_net *dev = get_device(vid);
1774 if (queue_id >= VHOST_MAX_VRING)
1777 vq = dev->virtqueue[queue_id];
1787 if (!rte_spinlock_trylock(&vq->access_lock)) {
1788 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1789 "virt queue busy.\n");
1793 if (vq->async->pkts_inflight_n) {
1794 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1795 "async inflight packets must be completed before unregistration.\n");
1800 vhost_free_async_mem(vq);
1802 rte_spinlock_unlock(&vq->access_lock);
1808 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1810 struct vhost_virtqueue *vq;
1811 struct virtio_net *dev = get_device(vid);
1816 if (queue_id >= VHOST_MAX_VRING)
1819 vq = dev->virtqueue[queue_id];
1827 if (vq->async->pkts_inflight_n) {
1828 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1829 "async inflight packets must be completed before unregistration.\n");
1833 vhost_free_async_mem(vq);
1839 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1841 struct vhost_virtqueue *vq;
1842 struct virtio_net *dev = get_device(vid);
1848 if (queue_id >= VHOST_MAX_VRING)
1851 vq = dev->virtqueue[queue_id];
1859 if (!rte_spinlock_trylock(&vq->access_lock)) {
1860 VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. "
1861 "virt queue busy.\n");
1865 ret = vq->async->pkts_inflight_n;
1866 rte_spinlock_unlock(&vq->access_lock);
1872 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1873 struct rte_vhost_power_monitor_cond *pmc)
1875 struct virtio_net *dev = get_device(vid);
1876 struct vhost_virtqueue *vq;
1880 if (queue_id >= VHOST_MAX_VRING)
1883 vq = dev->virtqueue[queue_id];
1887 if (vq_is_packed(dev)) {
1888 struct vring_packed_desc *desc;
1889 desc = vq->desc_packed;
1890 pmc->addr = &desc[vq->last_avail_idx].flags;
1891 if (vq->avail_wrap_counter)
1892 pmc->val = VRING_DESC_F_AVAIL;
1894 pmc->val = VRING_DESC_F_USED;
1895 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1896 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1899 pmc->addr = &vq->avail->idx;
1900 pmc->val = vq->last_avail_idx & (vq->size - 1);
1901 pmc->mask = vq->size - 1;
1902 pmc->size = sizeof(vq->avail->idx);
1909 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1910 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);