#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <sys/types.h>
#include <sys/stat.h>
#include <sys/syscall.h>
-#include <assert.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
}
-static int
-async_dma_map(struct virtio_net *dev, struct rte_vhost_mem_region *region, bool do_map)
+static void
+async_dma_map(struct virtio_net *dev, bool do_map)
{
- uint64_t host_iova;
int ret = 0;
+ uint32_t i;
+ struct guest_page *page;
- host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
if (do_map) {
- /* Add mapped region into the default container of DPDK. */
- ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
- region->host_user_addr,
- host_iova,
- region->size);
- if (ret) {
- /*
- * DMA device may bind with kernel driver, in this case,
- * we don't need to program IOMMU manually. However, if no
- * device is bound with vfio/uio in DPDK, and vfio kernel
- * module is loaded, the API will still be called and return
- * with ENODEV/ENOSUP.
- *
- * DPDK vfio only returns ENODEV/ENOSUP in very similar
- * situations(vfio either unsupported, or supported
- * but no devices found). Either way, no mappings could be
- * performed. We treat it as normal case in async path.
- */
- if (rte_errno == ENODEV || rte_errno == ENOTSUP)
- return 0;
-
- VHOST_LOG_CONFIG(ERR, "(%s) DMA engine map failed\n", dev->ifname);
- /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
- return 0;
+ for (i = 0; i < dev->nr_guest_pages; i++) {
+ page = &dev->guest_pages[i];
+ ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ page->host_user_addr,
+ page->host_iova,
+ page->size);
+ if (ret) {
+ /*
+ * DMA device may bind with kernel driver, in this case,
+ * we don't need to program IOMMU manually. However, if no
+ * device is bound with vfio/uio in DPDK, and vfio kernel
+ * module is loaded, the API will still be called and return
+ * with ENODEV.
+ *
+ * DPDK vfio only returns ENODEV in very similar situations
+ * (vfio either unsupported, or supported but no devices found).
+ * Either way, no mappings could be performed. We treat it as
+ * normal case in async path. This is a workaround.
+ */
+ if (rte_errno == ENODEV)
+ return;
+
+ /* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */
+ VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+ }
}
} else {
- /* Remove mapped region from the default container of DPDK. */
- ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
- region->host_user_addr,
- host_iova,
- region->size);
- if (ret) {
- /* like DMA map, ignore the kernel driver case when unmap. */
- if (rte_errno == EINVAL)
- return 0;
-
- VHOST_LOG_CONFIG(ERR, "(%s) DMA engine unmap failed\n", dev->ifname);
- return ret;
+ for (i = 0; i < dev->nr_guest_pages; i++) {
+ page = &dev->guest_pages[i];
+ ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ page->host_user_addr,
+ page->host_iova,
+ page->size);
+ if (ret) {
+ /* like DMA map, ignore the kernel driver case when unmap. */
+ if (rte_errno == EINVAL)
+ return;
+
+ VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+ }
}
}
-
- return ret;
}
static void
if (!dev || !dev->mem)
return;
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(dev, false);
+
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (reg->host_user_addr) {
- if (dev->async_copy && rte_vfio_is_enabled("vfio"))
- async_dma_map(dev, reg, false);
-
munmap(reg->mmap_addr, reg->mmap_size);
close(reg->fd);
}
void
vhost_backend_cleanup(struct virtio_net *dev)
{
+ struct rte_vdpa_device *vdpa_dev;
+
+ vdpa_dev = dev->vdpa_dev;
+ if (vdpa_dev && vdpa_dev->ops->dev_cleanup != NULL)
+ vdpa_dev->ops->dev_cleanup(dev->vid);
+
if (dev->mem) {
free_mem_region(dev);
rte_free(dev->mem);
dev = rte_realloc_socket(old_dev, sizeof(*dev), 0, node);
if (!dev) {
VHOST_LOG_CONFIG(ERR, "(%s) failed to realloc dev on node %d\n",
- dev->ifname, node);
+ old_dev->ifname, node);
return old_dev;
}
static int
add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
- uint64_t host_phys_addr, uint64_t size)
+ uint64_t host_iova, uint64_t host_user_addr, uint64_t size)
{
struct guest_page *page, *last_page;
struct guest_page *old_pages;
dev->max_guest_pages * sizeof(*page),
RTE_CACHE_LINE_SIZE);
if (dev->guest_pages == NULL) {
- VHOST_LOG_CONFIG(ERR, "(%s) cannot realloc guest_pages\n", dev->ifname);
+ VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
rte_free(old_pages);
return -1;
}
if (dev->nr_guest_pages > 0) {
last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
/* merge if the two pages are continuous */
- if (host_phys_addr == last_page->host_phys_addr +
- last_page->size) {
+ if (host_iova == last_page->host_iova + last_page->size &&
+ guest_phys_addr == last_page->guest_phys_addr + last_page->size &&
+ host_user_addr == last_page->host_user_addr + last_page->size) {
last_page->size += size;
return 0;
}
page = &dev->guest_pages[dev->nr_guest_pages++];
page->guest_phys_addr = guest_phys_addr;
- page->host_phys_addr = host_phys_addr;
+ page->host_iova = host_iova;
+ page->host_user_addr = host_user_addr;
page->size = size;
return 0;
uint64_t reg_size = reg->size;
uint64_t host_user_addr = reg->host_user_addr;
uint64_t guest_phys_addr = reg->guest_phys_addr;
- uint64_t host_phys_addr;
+ uint64_t host_iova;
uint64_t size;
- host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr);
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr);
size = page_size - (guest_phys_addr & (page_size - 1));
size = RTE_MIN(size, reg_size);
- if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+ if (add_one_guest_page(dev, guest_phys_addr, host_iova,
+ host_user_addr, size) < 0)
return -1;
host_user_addr += size;
while (reg_size > 0) {
size = RTE_MIN(reg_size, page_size);
- host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)
host_user_addr);
- if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
- size) < 0)
+ if (add_one_guest_page(dev, guest_phys_addr, host_iova,
+ host_user_addr, size) < 0)
return -1;
host_user_addr += size;
dev->ifname, i);
VHOST_LOG_CONFIG(INFO, "(%s)\tguest_phys_addr: %" PRIx64 "\n",
dev->ifname, page->guest_phys_addr);
- VHOST_LOG_CONFIG(INFO, "(%s)\thost_phys_addr : %" PRIx64 "\n",
- dev->ifname, page->host_phys_addr);
+ VHOST_LOG_CONFIG(INFO, "(%s)\thost_iova : %" PRIx64 "\n",
+ dev->ifname, page->host_iova);
VHOST_LOG_CONFIG(INFO, "(%s)\tsize : %" PRIx64 "\n",
dev->ifname, page->size);
}
uint64_t mmap_size;
uint64_t alignment;
int populate;
- int ret;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -region->size) {
dev->ifname);
return -1;
}
-
- if (rte_vfio_is_enabled("vfio")) {
- ret = async_dma_map(dev, region, true);
- if (ret) {
- VHOST_LOG_CONFIG(ERR,
- "(%s) configure IOMMU for DMA engine failed\n",
- dev->ifname);
- return -1;
- }
- }
}
VHOST_LOG_CONFIG(INFO, "(%s) guest memory region size: 0x%" PRIx64 "\n",
dev->mem->nregions++;
}
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(dev, true);
+
if (vhost_user_postcopy_register(dev, main_fd, ctx) < 0)
goto free_mem_table;
vhost_user_iotlb_cache_insert(dev, vq, imsg->iova, vva,
len, imsg->perm);
- if (is_vring_iotlb(dev, vq, imsg))
+ if (is_vring_iotlb(dev, vq, imsg)) {
+ rte_spinlock_lock(&vq->access_lock);
*pdev = dev = translate_ring_addresses(dev, i);
+ rte_spinlock_unlock(&vq->access_lock);
+ }
}
break;
case VHOST_IOTLB_INVALIDATE:
vhost_user_iotlb_cache_remove(vq, imsg->iova,
imsg->size);
- if (is_vring_iotlb(dev, vq, imsg))
+ if (is_vring_iotlb(dev, vq, imsg)) {
+ rte_spinlock_lock(&vq->access_lock);
vring_invalidate(dev, vq);
+ rte_spinlock_unlock(&vq->access_lock);
+ }
}
break;
default:
handled = false;
if (dev->extern_ops.pre_msg_handle) {
- ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
- (void *)&ctx.msg);
+ RTE_BUILD_BUG_ON(offsetof(struct vhu_msg_context, msg) != 0);
+ ret = (*dev->extern_ops.pre_msg_handle)(dev->vid, &ctx);
switch (ret) {
case RTE_VHOST_MSG_RESULT_REPLY:
send_vhost_reply(dev, fd, &ctx);
skip_to_post_handle:
if (ret != RTE_VHOST_MSG_RESULT_ERR &&
dev->extern_ops.post_msg_handle) {
- ret = (*dev->extern_ops.post_msg_handle)(dev->vid,
- (void *)&ctx.msg);
+ RTE_BUILD_BUG_ON(offsetof(struct vhu_msg_context, msg) != 0);
+ ret = (*dev->extern_ops.post_msg_handle)(dev->vid, &ctx);
switch (ret) {
case RTE_VHOST_MSG_RESULT_REPLY:
send_vhost_reply(dev, fd, &ctx);