vhost: protect active rings from async ring changes
authorVictor Kaplansky <victork@redhat.com>
Wed, 17 Jan 2018 13:49:25 +0000 (15:49 +0200)
committerFerruh Yigit <ferruh.yigit@intel.com>
Sun, 21 Jan 2018 14:51:52 +0000 (15:51 +0100)
When performing live migration or memory hot-plugging,
the changes to the device and vrings made by message handler
done independently from vring usage by PMD threads.

This causes for example segfaults during live-migration
with MQ enable, but in general virtually any request
sent by qemu changing the state of device can cause
problems.

These patches fixes all above issues by adding a spinlock
to every vring and requiring message handler to start operation
only after ensuring that all PMD threads related to the device
are out of critical section accessing the vring data.

Each vring has its own lock in order to not create contention
between PMD threads of different vrings and to prevent
performance degradation by scaling queue pair number.

See https://bugzilla.redhat.com/show_bug.cgi?id=1450680

Cc: stable@dpdk.org
Signed-off-by: Victor Kaplansky <victork@redhat.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Acked-by: Yuanhan Liu <yliu@fridaylinux.org>
lib/librte_vhost/vhost.c
lib/librte_vhost/vhost.h
lib/librte_vhost/vhost_user.c
lib/librte_vhost/virtio_net.c

index 6789ccc..1dd9adb 100644 (file)
@@ -232,6 +232,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
        dev->virtqueue[vring_idx] = vq;
        init_vring_queue(dev, vring_idx);
+       rte_spinlock_init(&vq->access_lock);
 
        dev->nr_vring += 1;
 
index b2bf0e8..e52a9b6 100644 (file)
@@ -81,12 +81,14 @@ struct vhost_virtqueue {
 
        /* Backend value to determine if device should started/stopped */
        int                     backend;
+       int                     enabled;
+       int                     access_ok;
+       rte_spinlock_t          access_lock;
+
        /* Used to notify the guest (trigger interrupt) */
        int                     callfd;
        /* Currently unused as polling mode is enabled */
        int                     kickfd;
-       int                     enabled;
-       int                     access_ok;
 
        /* Physical address of used ring, for logging */
        uint64_t                log_guest_addr;
index 0ea28ec..1dd1a61 100644 (file)
@@ -1260,12 +1260,47 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
        return alloc_vring_queue(dev, vring_idx);
 }
 
+static void
+vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
+{
+       unsigned int i = 0;
+       unsigned int vq_num = 0;
+
+       while (vq_num < dev->nr_vring) {
+               struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+               if (vq) {
+                       rte_spinlock_lock(&vq->access_lock);
+                       vq_num++;
+               }
+               i++;
+       }
+}
+
+static void
+vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
+{
+       unsigned int i = 0;
+       unsigned int vq_num = 0;
+
+       while (vq_num < dev->nr_vring) {
+               struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+               if (vq) {
+                       rte_spinlock_unlock(&vq->access_lock);
+                       vq_num++;
+               }
+               i++;
+       }
+}
+
 int
 vhost_user_msg_handler(int vid, int fd)
 {
        struct virtio_net *dev;
        struct VhostUserMsg msg;
        int ret;
+       int unlock_required = 0;
 
        dev = get_device(vid);
        if (dev == NULL)
@@ -1311,6 +1346,38 @@ vhost_user_msg_handler(int vid, int fd)
                return -1;
        }
 
+       /*
+        * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE,
+        * since it is sent when virtio stops and device is destroyed.
+        * destroy_device waits for queues to be inactive, so it is safe.
+        * Otherwise taking the access_lock would cause a dead lock.
+        */
+       switch (msg.request.master) {
+       case VHOST_USER_SET_FEATURES:
+       case VHOST_USER_SET_PROTOCOL_FEATURES:
+       case VHOST_USER_SET_OWNER:
+       case VHOST_USER_RESET_OWNER:
+       case VHOST_USER_SET_MEM_TABLE:
+       case VHOST_USER_SET_LOG_BASE:
+       case VHOST_USER_SET_LOG_FD:
+       case VHOST_USER_SET_VRING_NUM:
+       case VHOST_USER_SET_VRING_ADDR:
+       case VHOST_USER_SET_VRING_BASE:
+       case VHOST_USER_SET_VRING_KICK:
+       case VHOST_USER_SET_VRING_CALL:
+       case VHOST_USER_SET_VRING_ERR:
+       case VHOST_USER_SET_VRING_ENABLE:
+       case VHOST_USER_SEND_RARP:
+       case VHOST_USER_NET_SET_MTU:
+       case VHOST_USER_SET_SLAVE_REQ_FD:
+               vhost_user_lock_all_queue_pairs(dev);
+               unlock_required = 1;
+               break;
+       default:
+               break;
+
+       }
+
        switch (msg.request.master) {
        case VHOST_USER_GET_FEATURES:
                msg.payload.u64 = vhost_user_get_features(dev);
@@ -1414,6 +1481,9 @@ vhost_user_msg_handler(int vid, int fd)
 
        }
 
+       if (unlock_required)
+               vhost_user_unlock_all_queue_pairs(dev);
+
        if (msg.flags & VHOST_USER_NEED_REPLY) {
                msg.payload.u64 = !!ret;
                msg.size = sizeof(msg.payload.u64);
index 8ef71cc..edfab3b 100644 (file)
@@ -15,6 +15,7 @@
 #include <rte_udp.h>
 #include <rte_sctp.h>
 #include <rte_arp.h>
+#include <rte_spinlock.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -302,8 +303,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
        }
 
        vq = dev->virtqueue[queue_id];
+
+       rte_spinlock_lock(&vq->access_lock);
+
        if (unlikely(vq->enabled == 0))
-               return 0;
+               goto out_access_unlock;
 
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_lock(vq);
@@ -389,6 +393,9 @@ out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_unlock(vq);
 
+out_access_unlock:
+       rte_spinlock_unlock(&vq->access_lock);
+
        return count;
 }
 
@@ -621,8 +628,11 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
        }
 
        vq = dev->virtqueue[queue_id];
+
+       rte_spinlock_lock(&vq->access_lock);
+
        if (unlikely(vq->enabled == 0))
-               return 0;
+               goto out_access_unlock;
 
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_lock(vq);
@@ -678,6 +688,9 @@ out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_unlock(vq);
 
+out_access_unlock:
+       rte_spinlock_unlock(&vq->access_lock);
+
        return pkt_idx;
 }
 
@@ -1122,9 +1135,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
        }
 
        vq = dev->virtqueue[queue_id];
-       if (unlikely(vq->enabled == 0))
+
+       if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
                return 0;
 
+       if (unlikely(vq->enabled == 0))
+               goto out_access_unlock;
+
        vq->batch_copy_nb_elems = 0;
 
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -1293,6 +1310,9 @@ out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_unlock(vq);
 
+out_access_unlock:
+       rte_spinlock_unlock(&vq->access_lock);
+
        if (unlikely(rarp_mbuf != NULL)) {
                /*
                 * Inject it to the head of "pkts" array, so that switch's mac