vhost: simplify memory regions handling
authorYuanhan Liu <yuanhan.liu@linux.intel.com>
Sun, 9 Oct 2016 07:27:54 +0000 (15:27 +0800)
committerYuanhan Liu <yuanhan.liu@linux.intel.com>
Wed, 12 Oct 2016 07:44:56 +0000 (09:44 +0200)
Due to history reason (that vhost-cuse comes before vhost-user), some
fields for maintaining the vhost-user memory mappings (such as mmapped
address and size, with those we then can unmap on destroy) are kept in
"orig_region_map" struct, a structure that is defined only in vhost-user
source file.

The right way to go is to remove the structure and move all those fields
into virtio_memory_region struct. But we simply can't do that before,
because it breaks the ABI.

Now, thanks to the ABI refactoring, it's never been a blocking issue
any more. And here it goes: this patch removes orig_region_map and
redefines virtio_memory_region, to include all necessary info.

With that, we can simplify the guest/host address convert a bit.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
lib/librte_vhost/vhost.h
lib/librte_vhost/vhost_user.c

index c2dfc3c..df2107b 100644 (file)
@@ -143,12 +143,14 @@ struct virtio_net {
  * Information relating to memory regions including offsets to
  * addresses in QEMUs memory file.
  */
-struct virtio_memory_regions {
-       uint64_t guest_phys_address;
-       uint64_t guest_phys_address_end;
-       uint64_t memory_size;
-       uint64_t userspace_address;
-       uint64_t address_offset;
+struct virtio_memory_region {
+       uint64_t guest_phys_addr;
+       uint64_t guest_user_addr;
+       uint64_t host_user_addr;
+       uint64_t size;
+       void     *mmap_addr;
+       uint64_t mmap_size;
+       int fd;
 };
 
 
@@ -156,12 +158,8 @@ struct virtio_memory_regions {
  * Memory structure includes region and mapping information.
  */
 struct virtio_memory {
-       /* Base QEMU userspace address of the memory file. */
-       uint64_t base_address;
-       uint64_t mapped_address;
-       uint64_t mapped_size;
        uint32_t nregions;
-       struct virtio_memory_regions regions[0];
+       struct virtio_memory_region regions[0];
 };
 
 
@@ -200,26 +198,23 @@ extern uint64_t VHOST_FEATURES;
 #define MAX_VHOST_DEVICE       1024
 extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
-/**
- * Function to convert guest physical addresses to vhost virtual addresses.
- * This is used to convert guest virtio buffer addresses.
- */
+/* Convert guest physical Address to host virtual address */
 static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
 {
-       struct virtio_memory_regions *region;
-       uint32_t regionidx;
-       uint64_t vhost_va = 0;
-
-       for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-               region = &dev->mem->regions[regionidx];
-               if ((guest_pa >= region->guest_phys_address) &&
-                       (guest_pa <= region->guest_phys_address_end)) {
-                       vhost_va = region->address_offset + guest_pa;
-                       break;
+       struct virtio_memory_region *reg;
+       uint32_t i;
+
+       for (i = 0; i < dev->mem->nregions; i++) {
+               reg = &dev->mem->regions[i];
+               if (gpa >= reg->guest_phys_addr &&
+                   gpa <  reg->guest_phys_addr + reg->size) {
+                       return gpa - reg->guest_phys_addr +
+                              reg->host_user_addr;
                }
        }
-       return vhost_va;
+
+       return 0;
 }
 
 struct virtio_net_device_ops const *notify_ops;
index eee99e9..49585b8 100644 (file)
@@ -74,18 +74,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
        [VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
 };
 
-struct orig_region_map {
-       int fd;
-       uint64_t mapped_address;
-       uint64_t mapped_size;
-       uint64_t blksz;
-};
-
-#define orig_region(ptr, nregions) \
-       ((struct orig_region_map *)RTE_PTR_ADD((ptr), \
-               sizeof(struct virtio_memory) + \
-               sizeof(struct virtio_memory_regions) * (nregions)))
-
 static uint64_t
 get_blk_size(int fd)
 {
@@ -99,18 +87,17 @@ get_blk_size(int fd)
 static void
 free_mem_region(struct virtio_net *dev)
 {
-       struct orig_region_map *region;
-       unsigned int idx;
+       uint32_t i;
+       struct virtio_memory_region *reg;
 
        if (!dev || !dev->mem)
                return;
 
-       region = orig_region(dev->mem, dev->mem->nregions);
-       for (idx = 0; idx < dev->mem->nregions; idx++) {
-               if (region[idx].mapped_address) {
-                       munmap((void *)(uintptr_t)region[idx].mapped_address,
-                                       region[idx].mapped_size);
-                       close(region[idx].fd);
+       for (i = 0; i < dev->mem->nregions; i++) {
+               reg = &dev->mem->regions[i];
+               if (reg->host_user_addr) {
+                       munmap(reg->mmap_addr, reg->mmap_size);
+                       close(reg->fd);
                }
        }
 }
@@ -120,7 +107,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 {
        if (dev->mem) {
                free_mem_region(dev);
-               free(dev->mem);
+               rte_free(dev->mem);
                dev->mem = NULL;
        }
        if (dev->log_addr) {
@@ -286,25 +273,23 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
  * used to convert the ring addresses to our address space.
  */
 static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
+qva_to_vva(struct virtio_net *dev, uint64_t qva)
 {
-       struct virtio_memory_regions *region;
-       uint64_t vhost_va = 0;
-       uint32_t regionidx = 0;
+       struct virtio_memory_region *reg;
+       uint32_t i;
 
        /* Find the region where the address lives. */
-       for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-               region = &dev->mem->regions[regionidx];
-               if ((qemu_va >= region->userspace_address) &&
-                       (qemu_va <= region->userspace_address +
-                       region->memory_size)) {
-                       vhost_va = qemu_va + region->guest_phys_address +
-                               region->address_offset -
-                               region->userspace_address;
-                       break;
+       for (i = 0; i < dev->mem->nregions; i++) {
+               reg = &dev->mem->regions[i];
+
+               if (qva >= reg->guest_user_addr &&
+                   qva <  reg->guest_user_addr + reg->size) {
+                       return qva - reg->guest_user_addr +
+                              reg->host_user_addr;
                }
        }
-       return vhost_va;
+
+       return 0;
 }
 
 /*
@@ -391,11 +376,13 @@ static int
 vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 {
        struct VhostUserMemory memory = pmsg->payload.memory;
-       struct virtio_memory_regions *pregion;
-       uint64_t mapped_address, mapped_size;
-       unsigned int idx = 0;
-       struct orig_region_map *pregion_orig;
+       struct virtio_memory_region *reg;
+       void *mmap_addr;
+       uint64_t mmap_size;
+       uint64_t mmap_offset;
        uint64_t alignment;
+       uint32_t i;
+       int fd;
 
        /* Remove from the data plane. */
        if (dev->flags & VIRTIO_DEV_RUNNING) {
@@ -405,14 +392,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 
        if (dev->mem) {
                free_mem_region(dev);
-               free(dev->mem);
+               rte_free(dev->mem);
                dev->mem = NULL;
        }
 
-       dev->mem = calloc(1,
-               sizeof(struct virtio_memory) +
-               sizeof(struct virtio_memory_regions) * memory.nregions +
-               sizeof(struct orig_region_map) * memory.nregions);
+       dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
+               sizeof(struct virtio_memory_region) * memory.nregions, 0);
        if (dev->mem == NULL) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "(%d) failed to allocate memory for dev->mem\n",
@@ -421,22 +406,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
        }
        dev->mem->nregions = memory.nregions;
 
-       pregion_orig = orig_region(dev->mem, memory.nregions);
-       for (idx = 0; idx < memory.nregions; idx++) {
-               pregion = &dev->mem->regions[idx];
-               pregion->guest_phys_address =
-                       memory.regions[idx].guest_phys_addr;
-               pregion->guest_phys_address_end =
-                       memory.regions[idx].guest_phys_addr +
-                       memory.regions[idx].memory_size;
-               pregion->memory_size =
-                       memory.regions[idx].memory_size;
-               pregion->userspace_address =
-                       memory.regions[idx].userspace_addr;
-
-               /* This is ugly */
-               mapped_size = memory.regions[idx].memory_size +
-                       memory.regions[idx].mmap_offset;
+       for (i = 0; i < memory.nregions; i++) {
+               fd  = pmsg->fds[i];
+               reg = &dev->mem->regions[i];
+
+               reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
+               reg->guest_user_addr = memory.regions[i].userspace_addr;
+               reg->size            = memory.regions[i].memory_size;
+               reg->fd              = fd;
+
+               mmap_offset = memory.regions[i].mmap_offset;
+               mmap_size   = reg->size + mmap_offset;
 
                /* mmap() without flag of MAP_ANONYMOUS, should be called
                 * with length argument aligned with hugepagesz at older
@@ -446,67 +426,52 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
                 * to avoid failure, make sure in caller to keep length
                 * aligned.
                 */
-               alignment = get_blk_size(pmsg->fds[idx]);
+               alignment = get_blk_size(fd);
                if (alignment == (uint64_t)-1) {
                        RTE_LOG(ERR, VHOST_CONFIG,
                                "couldn't get hugepage size through fstat\n");
                        goto err_mmap;
                }
-               mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
+               mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
 
-               mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
-                       mapped_size,
-                       PROT_READ | PROT_WRITE, MAP_SHARED,
-                       pmsg->fds[idx],
-                       0);
+               mmap_addr = mmap(NULL, mmap_size,
+                                PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 
-               RTE_LOG(INFO, VHOST_CONFIG,
-                       "mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
-                       "off:0x%"PRIx64" align:0x%"PRIx64"\n",
-                       idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
-                       mapped_size, memory.regions[idx].mmap_offset,
-                       alignment);
-
-               if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+               if (mmap_addr == MAP_FAILED) {
                        RTE_LOG(ERR, VHOST_CONFIG,
-                               "mmap qemu guest failed.\n");
+                               "mmap region %u failed.\n", i);
                        goto err_mmap;
                }
 
-               pregion_orig[idx].mapped_address = mapped_address;
-               pregion_orig[idx].mapped_size = mapped_size;
-               pregion_orig[idx].blksz = alignment;
-               pregion_orig[idx].fd = pmsg->fds[idx];
-
-               mapped_address +=  memory.regions[idx].mmap_offset;
+               reg->mmap_addr = mmap_addr;
+               reg->mmap_size = mmap_size;
+               reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
+                                     mmap_offset;
 
-               pregion->address_offset = mapped_address -
-                       pregion->guest_phys_address;
-
-               if (memory.regions[idx].guest_phys_addr == 0) {
-                       dev->mem->base_address =
-                               memory.regions[idx].userspace_addr;
-                       dev->mem->mapped_address =
-                               pregion->address_offset;
-               }
-
-               LOG_DEBUG(VHOST_CONFIG,
-                       "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
-                       idx,
-                       (void *)(uintptr_t)pregion->guest_phys_address,
-                       (void *)(uintptr_t)pregion->userspace_address,
-                        pregion->memory_size);
+               RTE_LOG(INFO, VHOST_CONFIG,
+                       "guest memory region %u, size: 0x%" PRIx64 "\n"
+                       "\t guest physical addr: 0x%" PRIx64 "\n"
+                       "\t guest virtual  addr: 0x%" PRIx64 "\n"
+                       "\t host  virtual  addr: 0x%" PRIx64 "\n"
+                       "\t mmap addr : 0x%" PRIx64 "\n"
+                       "\t mmap size : 0x%" PRIx64 "\n"
+                       "\t mmap align: 0x%" PRIx64 "\n"
+                       "\t mmap off  : 0x%" PRIx64 "\n",
+                       i, reg->size,
+                       reg->guest_phys_addr,
+                       reg->guest_user_addr,
+                       reg->host_user_addr,
+                       (uint64_t)(uintptr_t)mmap_addr,
+                       mmap_size,
+                       alignment,
+                       mmap_offset);
        }
 
        return 0;
 
 err_mmap:
-       while (idx--) {
-               munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
-                               pregion_orig[idx].mapped_size);
-               close(pregion_orig[idx].fd);
-       }
-       free(dev->mem);
+       free_mem_region(dev);
+       rte_free(dev->mem);
        dev->mem = NULL;
        return -1;
 }