eal/windows: add missing SPDX license tag
[dpdk.git] / lib / librte_vhost / vhost.c
index 0cf6172..52ab93d 100644 (file)
@@ -26,6 +26,7 @@
 #include "vhost_user.h"
 
 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
 
 /* Called with iotlb_lock read-locked */
 uint64_t
@@ -106,7 +107,7 @@ __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
                return;
 
        /* To make sure guest memory updates are committed before logging */
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 
        page = addr / VHOST_LOG_PAGE;
        while (page * VHOST_LOG_PAGE < addr + len) {
@@ -144,7 +145,7 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
        if (unlikely(!dev->log_base))
                return;
 
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 
        log_base = (unsigned long *)(uintptr_t)dev->log_base;
 
@@ -163,7 +164,7 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
 #endif
        }
 
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 
        vq->log_cache_nb_elem = 0;
 }
@@ -190,7 +191,7 @@ vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
                 * No more room for a new log cache entry,
                 * so write the dirty log map directly.
                 */
-               rte_smp_wmb();
+               rte_atomic_thread_fence(__ATOMIC_RELEASE);
                vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
 
                return;
@@ -324,13 +325,33 @@ cleanup_device(struct virtio_net *dev, int destroy)
        }
 }
 
+static void
+vhost_free_async_mem(struct vhost_virtqueue *vq)
+{
+       if (vq->async_pkts_info)
+               rte_free(vq->async_pkts_info);
+       if (vq->async_descs_split)
+               rte_free(vq->async_descs_split);
+       if (vq->it_pool)
+               rte_free(vq->it_pool);
+       if (vq->vec_pool)
+               rte_free(vq->vec_pool);
+
+       vq->async_pkts_info = NULL;
+       vq->async_descs_split = NULL;
+       vq->it_pool = NULL;
+       vq->vec_pool = NULL;
+}
+
 void
 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
        if (vq_is_packed(dev))
                rte_free(vq->shadow_used_packed);
-       else
+       else {
                rte_free(vq->shadow_used_split);
+               vhost_free_async_mem(vq);
+       }
        rte_free(vq->batch_copy_elems);
        rte_mempool_free(vq->iotlb_pool);
        rte_free(vq);
@@ -524,17 +545,21 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
        }
 
        vq = dev->virtqueue[vring_idx];
+       if (!vq) {
+               VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
+                               vring_idx);
+               return;
+       }
 
        memset(vq, 0, sizeof(struct vhost_virtqueue));
 
        vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
        vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+       vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
 
        vhost_user_iotlb_init(dev, vring_idx);
        /* Backends are set to -1 indicating an inactive device. */
        vq->backend = -1;
-
-       TAILQ_INIT(&vq->zmbuf_list);
 }
 
 static void
@@ -551,6 +576,12 @@ reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
        }
 
        vq = dev->virtqueue[vring_idx];
+       if (!vq) {
+               VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
+                               vring_idx);
+               return;
+       }
+
        callfd = vq->callfd;
        init_vring_queue(dev, vring_idx);
        vq->callfd = callfd;
@@ -560,22 +591,29 @@ int
 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
        struct vhost_virtqueue *vq;
+       uint32_t i;
 
-       vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-       if (vq == NULL) {
-               VHOST_LOG_CONFIG(ERR,
-                       "Failed to allocate memory for vring:%u.\n", vring_idx);
-               return -1;
-       }
+       /* Also allocate holes, if any, up to requested vring index. */
+       for (i = 0; i <= vring_idx; i++) {
+               if (dev->virtqueue[i])
+                       continue;
 
-       dev->virtqueue[vring_idx] = vq;
-       init_vring_queue(dev, vring_idx);
-       rte_spinlock_init(&vq->access_lock);
-       vq->avail_wrap_counter = 1;
-       vq->used_wrap_counter = 1;
-       vq->signalled_used_valid = false;
+               vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
+               if (vq == NULL) {
+                       VHOST_LOG_CONFIG(ERR,
+                               "Failed to allocate memory for vring:%u.\n", i);
+                       return -1;
+               }
 
-       dev->nr_vring += 1;
+               dev->virtqueue[i] = vq;
+               init_vring_queue(dev, i);
+               rte_spinlock_init(&vq->access_lock);
+               vq->avail_wrap_counter = 1;
+               vq->used_wrap_counter = 1;
+               vq->signalled_used_valid = false;
+       }
+
+       dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
 
        return 0;
 }
@@ -608,6 +646,7 @@ vhost_new_device(void)
        struct virtio_net *dev;
        int i;
 
+       pthread_mutex_lock(&vhost_dev_lock);
        for (i = 0; i < MAX_VHOST_DEVICE; i++) {
                if (vhost_devices[i] == NULL)
                        break;
@@ -616,6 +655,7 @@ vhost_new_device(void)
        if (i == MAX_VHOST_DEVICE) {
                VHOST_LOG_CONFIG(ERR,
                        "Failed to find a free slot for new device.\n");
+               pthread_mutex_unlock(&vhost_dev_lock);
                return -1;
        }
 
@@ -623,10 +663,13 @@ vhost_new_device(void)
        if (dev == NULL) {
                VHOST_LOG_CONFIG(ERR,
                        "Failed to allocate memory for new dev.\n");
+               pthread_mutex_unlock(&vhost_dev_lock);
                return -1;
        }
 
        vhost_devices[i] = dev;
+       pthread_mutex_unlock(&vhost_dev_lock);
+
        dev->vid = i;
        dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
        dev->slave_req_fd = -1;
@@ -643,7 +686,7 @@ vhost_destroy_device_notify(struct virtio_net *dev)
 
        if (dev->flags & VIRTIO_DEV_RUNNING) {
                vdpa_dev = dev->vdpa_dev;
-               if (vdpa_dev && vdpa_dev->ops->dev_close)
+               if (vdpa_dev)
                        vdpa_dev->ops->dev_close(dev->vid);
                dev->flags &= ~VIRTIO_DEV_RUNNING;
                dev->notify_ops->destroy_device(dev->vid);
@@ -698,17 +741,6 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
        dev->ifname[sizeof(dev->ifname) - 1] = '\0';
 }
 
-void
-vhost_enable_dequeue_zero_copy(int vid)
-{
-       struct virtio_net *dev = get_device(vid);
-
-       if (dev == NULL)
-               return;
-
-       dev->dequeue_zero_copy = 1;
-}
-
 void
 vhost_set_builtin_virtio_net(int vid, bool enable)
 {
@@ -1071,11 +1103,11 @@ rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
        if (unlikely(idx >= vq->size))
                return -1;
 
-       rte_smp_mb();
+       rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
 
        vq->inflight_split->desc[idx].inflight = 0;
 
-       rte_smp_mb();
+       rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
 
        vq->inflight_split->used_idx = last_used_idx;
        return 0;
@@ -1114,11 +1146,11 @@ rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
        if (unlikely(head >= vq->size))
                return -1;
 
-       rte_smp_mb();
+       rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
 
        inflight_info->desc[head].inflight = 0;
 
-       rte_smp_mb();
+       rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
 
        inflight_info->old_free_head = inflight_info->free_head;
        inflight_info->old_used_idx = inflight_info->used_idx;
@@ -1245,7 +1277,12 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
        if (!dev)
                return 0;
 
+       if (queue_id >= VHOST_MAX_VRING)
+               return 0;
+
        vq = dev->virtqueue[queue_id];
+       if (!vq)
+               return 0;
 
        rte_spinlock_lock(&vq->access_lock);
 
@@ -1299,12 +1336,29 @@ vhost_enable_notify_packed(struct virtio_net *dev,
                        vq->avail_wrap_counter << 15;
        }
 
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 
        vq->device_event->flags = flags;
        return 0;
 }
 
+int
+vhost_enable_guest_notification(struct virtio_net *dev,
+               struct vhost_virtqueue *vq, int enable)
+{
+       /*
+        * If the virtqueue is not ready yet, it will be applied
+        * when it will become ready.
+        */
+       if (!vq->ready)
+               return 0;
+
+       if (vq_is_packed(dev))
+               return vhost_enable_notify_packed(dev, vq, enable);
+       else
+               return vhost_enable_notify_split(dev, vq, enable);
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
@@ -1315,14 +1369,17 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
        if (!dev)
                return -1;
 
+       if (queue_id >= VHOST_MAX_VRING)
+               return -1;
+
        vq = dev->virtqueue[queue_id];
+       if (!vq)
+               return -1;
 
        rte_spinlock_lock(&vq->access_lock);
 
-       if (vq_is_packed(dev))
-               ret = vhost_enable_notify_packed(dev, vq, enable);
-       else
-               ret = vhost_enable_notify_split(dev, vq, enable);
+       vq->notif_enable = enable;
+       ret = vhost_enable_guest_notification(dev, vq, enable);
 
        rte_spinlock_unlock(&vq->access_lock);
 
@@ -1427,6 +1484,9 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
        if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
                return -1;
 
+       if (queue_id >= VHOST_MAX_VRING)
+               return -1;
+
        vq = dev->virtqueue[queue_id];
        if (!vq)
                return -1;
@@ -1453,6 +1513,9 @@ int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
        if (!dev)
                return -1;
 
+       if (queue_id >= VHOST_MAX_VRING)
+               return -1;
+
        vq = dev->virtqueue[queue_id];
        if (!vq)
                return -1;
@@ -1477,15 +1540,23 @@ rte_vhost_get_vring_base_from_inflight(int vid,
                                       uint16_t *last_used_idx)
 {
        struct rte_vhost_inflight_info_packed *inflight_info;
+       struct vhost_virtqueue *vq;
        struct virtio_net *dev = get_device(vid);
 
        if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
                return -1;
 
+       if (queue_id >= VHOST_MAX_VRING)
+               return -1;
+
+       vq = dev->virtqueue[queue_id];
+       if (!vq)
+               return -1;
+
        if (!vq_is_packed(dev))
                return -1;
 
-       inflight_info = dev->virtqueue[queue_id]->inflight_packed;
+       inflight_info = vq->inflight_packed;
        if (!inflight_info)
                return -1;
 
@@ -1509,5 +1580,141 @@ int rte_vhost_extern_callback_register(int vid,
        return 0;
 }
 
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+                                       uint32_t features,
+                                       struct rte_vhost_async_channel_ops *ops)
+{
+       struct vhost_virtqueue *vq;
+       struct virtio_net *dev = get_device(vid);
+       struct rte_vhost_async_features f;
+       int node;
+
+       if (dev == NULL || ops == NULL)
+               return -1;
+
+       f.intval = features;
+
+       if (queue_id >= VHOST_MAX_VRING)
+               return -1;
+
+       vq = dev->virtqueue[queue_id];
+
+       if (unlikely(vq == NULL || !dev->async_copy))
+               return -1;
+
+       /* packed queue is not supported */
+       if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+               VHOST_LOG_CONFIG(ERR,
+                       "async copy is not supported on packed queue or non-inorder mode "
+                       "(vid %d, qid: %d)\n", vid, queue_id);
+               return -1;
+       }
+
+       if (unlikely(ops->check_completed_copies == NULL ||
+               ops->transfer_data == NULL))
+               return -1;
+
+       rte_spinlock_lock(&vq->access_lock);
+
+       if (unlikely(vq->async_registered)) {
+               VHOST_LOG_CONFIG(ERR,
+                       "async register failed: channel already registered "
+                       "(vid %d, qid: %d)\n", vid, queue_id);
+               goto reg_out;
+       }
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+       if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
+               VHOST_LOG_CONFIG(ERR,
+                       "unable to get numa information in async register. "
+                       "allocating async buffer memory on the caller thread node\n");
+               node = SOCKET_ID_ANY;
+       }
+#else
+       node = SOCKET_ID_ANY;
+#endif
+
+       vq->async_pkts_info = rte_malloc_socket(NULL,
+                       vq->size * sizeof(struct async_inflight_info),
+                       RTE_CACHE_LINE_SIZE, node);
+       vq->it_pool = rte_malloc_socket(NULL,
+                       VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
+                       RTE_CACHE_LINE_SIZE, node);
+       vq->vec_pool = rte_malloc_socket(NULL,
+                       VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
+                       RTE_CACHE_LINE_SIZE, node);
+       vq->async_descs_split = rte_malloc_socket(NULL,
+                       vq->size * sizeof(struct vring_used_elem),
+                       RTE_CACHE_LINE_SIZE, node);
+       if (!vq->async_descs_split || !vq->async_pkts_info ||
+               !vq->it_pool || !vq->vec_pool) {
+               vhost_free_async_mem(vq);
+               VHOST_LOG_CONFIG(ERR,
+                               "async register failed: cannot allocate memory for vq data "
+                               "(vid %d, qid: %d)\n", vid, queue_id);
+               goto reg_out;
+       }
+
+       vq->async_ops.check_completed_copies = ops->check_completed_copies;
+       vq->async_ops.transfer_data = ops->transfer_data;
+
+       vq->async_inorder = f.async_inorder;
+       vq->async_threshold = f.async_threshold;
+
+       vq->async_registered = true;
+
+reg_out:
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return 0;
+}
+
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
+{
+       struct vhost_virtqueue *vq;
+       struct virtio_net *dev = get_device(vid);
+       int ret = -1;
+
+       if (dev == NULL)
+               return ret;
+
+       if (queue_id >= VHOST_MAX_VRING)
+               return ret;
+
+       vq = dev->virtqueue[queue_id];
+
+       if (vq == NULL)
+               return ret;
+
+       ret = 0;
+
+       if (!vq->async_registered)
+               return ret;
+
+       if (!rte_spinlock_trylock(&vq->access_lock)) {
+               VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
+                       "virt queue busy.\n");
+               return -1;
+       }
+
+       if (vq->async_pkts_inflight_n) {
+               VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
+                       "async inflight packets must be completed before unregistration.\n");
+               ret = -1;
+               goto out;
+       }
+
+       vhost_free_async_mem(vq);
+
+       vq->async_ops.transfer_data = NULL;
+       vq->async_ops.check_completed_copies = NULL;
+       vq->async_registered = false;
+
+out:
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return ret;
+}
+
 RTE_LOG_REGISTER(vhost_config_log_level, lib.vhost.config, INFO);
 RTE_LOG_REGISTER(vhost_data_log_level, lib.vhost.data, WARNING);