]> git.droids-corp.org - dpdk.git/commitdiff
vhost: provide helpers for virtio ring relay
authorXiao Wang <xiao.w.wang@intel.com>
Tue, 18 Dec 2018 08:02:00 +0000 (16:02 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 21 Dec 2018 15:22:40 +0000 (16:22 +0100)
This patch provides two helpers for vdpa device driver to perform a
relay between the guest virtio ring and a mediated virtio ring.

The available ring relay will synchronize the available entries, and
help to do desc validity checking.

The used ring relay will synchronize the used entries from mediated ring
to guest ring, and help to do dirty page logging for live migration.

The later patch will leverage these two helpers.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
lib/librte_vhost/rte_vdpa.h
lib/librte_vhost/rte_vhost_version.map
lib/librte_vhost/vdpa.c
lib/librte_vhost/vhost.h
lib/librte_vhost/virtio_net.c

index af40679496d1871739c2c92be53d828158610ec8..c3c14816bb4d29c7f8ecde044dd016248ebb57c5 100644 (file)
@@ -173,4 +173,43 @@ rte_vdpa_get_device_num(void);
  */
 int __rte_experimental
 rte_vhost_host_notifier_ctrl(int vid, bool enable);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Synchronize the available ring from guest to mediated ring, help to
+ * check desc validity to protect against malicious guest driver.
+ *
+ * @param vid
+ *  vhost device id
+ * @param qid
+ *  vhost queue id
+ * @param vring_m
+ *  mediated virtio ring pointer
+ * @return
+ *  number of synced available entries on success, -1 on failure
+ */
+int __rte_experimental
+rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Synchronize the used ring from mediated ring to guest, log dirty
+ * page for each writeable buffer, caller should handle the used
+ * ring logging before device stop.
+ *
+ * @param vid
+ *  vhost device id
+ * @param qid
+ *  vhost queue id
+ * @param vring_m
+ *  mediated virtio ring pointer
+ * @return
+ *  number of synced used entries on success, -1 on failure
+ */
+int __rte_experimental
+rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m);
 #endif /* _RTE_VDPA_H_ */
index 22302e97299356534851183a56ab9cc0be498a09..dd3b4c1cbf45c34d4b5337ef3ba4868cce0b6c17 100644 (file)
@@ -84,4 +84,6 @@ EXPERIMENTAL {
        rte_vhost_crypto_set_zero_copy;
        rte_vhost_va_from_guest_pa;
        rte_vhost_host_notifier_ctrl;
+       rte_vdpa_relay_vring_avail;
+       rte_vdpa_relay_vring_used;
 };
index e7d849ee0f4f1f91661cdf0dcebbbb7365e3b4ba..240a1fe3a18a0c8c78096f01c668f99a2cd0033c 100644 (file)
@@ -122,3 +122,197 @@ rte_vdpa_get_device_num(void)
 {
        return vdpa_device_num;
 }
+
+static bool
+invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
+               uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
+{
+       uint64_t desc_addr, desc_chunck_len;
+
+       while (desc_len) {
+               desc_chunck_len = desc_len;
+               desc_addr = vhost_iova_to_vva(dev, vq,
+                               desc_iova,
+                               &desc_chunck_len,
+                               perm);
+
+               if (!desc_addr)
+                       return true;
+
+               desc_len -= desc_chunck_len;
+               desc_iova += desc_chunck_len;
+       }
+
+       return false;
+}
+
+int __rte_experimental
+rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
+{
+       struct virtio_net *dev = get_device(vid);
+       uint16_t idx, idx_m, desc_id;
+       struct vring_desc desc;
+       struct vhost_virtqueue *vq;
+       struct vring_desc *desc_ring;
+       struct vring_desc *idesc = NULL;
+       struct vring *s_vring;
+       uint64_t dlen;
+       int ret;
+       uint8_t perm;
+
+       if (!dev || !vring_m)
+               return -1;
+
+       if (qid >= dev->nr_vring)
+               return -1;
+
+       if (vq_is_packed(dev))
+               return -1;
+
+       s_vring = (struct vring *)vring_m;
+       vq = dev->virtqueue[qid];
+       idx = vq->avail->idx;
+       idx_m = s_vring->avail->idx;
+       ret = (uint16_t)(idx - idx_m);
+
+       while (idx_m != idx) {
+               /* avail entry copy */
+               desc_id = vq->avail->ring[idx_m & (vq->size - 1)];
+               s_vring->avail->ring[idx_m & (vq->size - 1)] = desc_id;
+               desc_ring = vq->desc;
+
+               if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
+                       dlen = vq->desc[desc_id].len;
+                       desc_ring = (struct vring_desc *)(uintptr_t)
+                               vhost_iova_to_vva(dev, vq,
+                                               vq->desc[desc_id].addr, &dlen,
+                                               VHOST_ACCESS_RO);
+                       if (unlikely(!desc_ring))
+                               return -1;
+
+                       if (unlikely(dlen < vq->desc[idx].len)) {
+                               idesc = alloc_copy_ind_table(dev, vq,
+                                               vq->desc[idx].addr,
+                                               vq->desc[idx].len);
+                               if (unlikely(!idesc))
+                                       return -1;
+
+                               desc_ring = idesc;
+                       }
+
+                       desc_id = 0;
+               }
+
+               /* check if the buf addr is within the guest memory */
+               do {
+                       desc = desc_ring[desc_id];
+                       perm = desc.flags & VRING_DESC_F_WRITE ?
+                               VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+                       if (invalid_desc_check(dev, vq, desc.addr, desc.len,
+                                               perm)) {
+                               if (unlikely(idesc))
+                                       free_ind_table(idesc);
+                               return -1;
+                       }
+                       desc_id = desc.next;
+               } while (desc.flags & VRING_DESC_F_NEXT);
+
+               if (unlikely(idesc)) {
+                       free_ind_table(idesc);
+                       idesc = NULL;
+               }
+
+               idx_m++;
+       }
+
+       rte_smp_wmb();
+       s_vring->avail->idx = idx;
+
+       if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+               vhost_avail_event(vq) = idx;
+
+       return ret;
+}
+
+int __rte_experimental
+rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
+{
+       struct virtio_net *dev = get_device(vid);
+       uint16_t idx, idx_m, desc_id;
+       struct vhost_virtqueue *vq;
+       struct vring_desc desc;
+       struct vring_desc *desc_ring;
+       struct vring_desc *idesc = NULL;
+       struct vring *s_vring;
+       uint64_t dlen;
+       int ret;
+
+       if (!dev || !vring_m)
+               return -1;
+
+       if (qid >= dev->nr_vring)
+               return -1;
+
+       if (vq_is_packed(dev))
+               return -1;
+
+       s_vring = (struct vring *)vring_m;
+       vq = dev->virtqueue[qid];
+       idx = vq->used->idx;
+       idx_m = s_vring->used->idx;
+       ret = (uint16_t)(idx_m - idx);
+
+       while (idx != idx_m) {
+               /* copy used entry, used ring logging is not covered here */
+               vq->used->ring[idx & (vq->size - 1)] =
+                       s_vring->used->ring[idx & (vq->size - 1)];
+
+               desc_id = vq->used->ring[idx & (vq->size - 1)].id;
+               desc_ring = vq->desc;
+
+               if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
+                       dlen = vq->desc[desc_id].len;
+                       desc_ring = (struct vring_desc *)(uintptr_t)
+                               vhost_iova_to_vva(dev, vq,
+                                               vq->desc[desc_id].addr, &dlen,
+                                               VHOST_ACCESS_RO);
+                       if (unlikely(!desc_ring))
+                               return -1;
+
+                       if (unlikely(dlen < vq->desc[idx].len)) {
+                               idesc = alloc_copy_ind_table(dev, vq,
+                                               vq->desc[idx].addr,
+                                               vq->desc[idx].len);
+                               if (unlikely(!idesc))
+                                       return -1;
+
+                               desc_ring = idesc;
+                       }
+
+                       desc_id = 0;
+               }
+
+               /* dirty page logging for DMA writeable buffer */
+               do {
+                       desc = desc_ring[desc_id];
+                       if (desc.flags & VRING_DESC_F_WRITE)
+                               vhost_log_write(dev, desc.addr, desc.len);
+                       desc_id = desc.next;
+               } while (desc.flags & VRING_DESC_F_NEXT);
+
+               if (unlikely(idesc)) {
+                       free_ind_table(idesc);
+                       idesc = NULL;
+               }
+
+               idx++;
+       }
+
+       rte_smp_wmb();
+       vq->used->idx = idx_m;
+
+       if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+               vring_used_event(s_vring) = idx_m;
+
+       return ret;
+}
index d5bab4803a57038296c3e3f994681890b0c8823c..3b3265c4b63e98232f3b9658a24a4c13fce56826 100644 (file)
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_ether.h>
 #include <rte_rwlock.h>
+#include <rte_malloc.h>
 
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
@@ -754,4 +755,43 @@ kick:
                eventfd_write(vq->callfd, (eventfd_t)1);
 }
 
+static __rte_always_inline void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+               uint64_t desc_addr, uint64_t desc_len)
+{
+       void *idesc;
+       uint64_t src, dst;
+       uint64_t len, remain = desc_len;
+
+       idesc = rte_malloc(__func__, desc_len, 0);
+       if (unlikely(!idesc))
+               return 0;
+
+       dst = (uint64_t)(uintptr_t)idesc;
+
+       while (remain) {
+               len = remain;
+               src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+                               VHOST_ACCESS_RO);
+               if (unlikely(!src || !len)) {
+                       rte_free(idesc);
+                       return 0;
+               }
+
+               rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+               remain -= len;
+               dst += len;
+               desc_addr += len;
+       }
+
+       return idesc;
+}
+
+static __rte_always_inline void
+free_ind_table(void *idesc)
+{
+       rte_free(idesc);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
index 5e1a1a727cf3bc0a8475b42c35311734ffd66a29..8c657a1013ade1fd9c932d4a10e8d8ddaee3651f 100644 (file)
@@ -37,45 +37,6 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
        return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
-static __rte_always_inline void *
-alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
-               uint64_t desc_addr, uint64_t desc_len)
-{
-       void *idesc;
-       uint64_t src, dst;
-       uint64_t len, remain = desc_len;
-
-       idesc = rte_malloc(__func__, desc_len, 0);
-       if (unlikely(!idesc))
-               return 0;
-
-       dst = (uint64_t)(uintptr_t)idesc;
-
-       while (remain) {
-               len = remain;
-               src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
-                               VHOST_ACCESS_RO);
-               if (unlikely(!src || !len)) {
-                       rte_free(idesc);
-                       return 0;
-               }
-
-               rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
-
-               remain -= len;
-               dst += len;
-               desc_addr += len;
-       }
-
-       return idesc;
-}
-
-static __rte_always_inline void
-free_ind_table(void *idesc)
-{
-       rte_free(idesc);
-}
-
 static __rte_always_inline void
 do_flush_shadow_used_ring_split(struct virtio_net *dev,
                        struct vhost_virtqueue *vq,