#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
#include "iotlb.h"
#include "vhost.h"
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
}
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+ uint64_t host_iova;
+ int ret = 0;
+
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+ if (do_map) {
+ /* Add mapped region into the default container of DPDK. */
+ ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ if (ret) {
+ /*
+ * DMA device may bind with kernel driver, in this case,
+ * we don't need to program IOMMU manually. However, if no
+ * device is bound with vfio/uio in DPDK, and vfio kernel
+ * module is loaded, the API will still be called and return
+ * with ENODEV/ENOSUP.
+ *
+ * DPDK vfio only returns ENODEV/ENOSUP in very similar
+ * situations(vfio either unsupported, or supported
+ * but no devices found). Either way, no mappings could be
+ * performed. We treat it as normal case in async path.
+ */
+ if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+ return 0;
+
+ VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+ /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
+ return 0;
+ }
+
+ } else {
+ /* Remove mapped region from the default container of DPDK. */
+ ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ if (ret) {
+ /* like DMA map, ignore the kernel driver case when unmap. */
+ if (rte_errno == EINVAL)
+ return 0;
+
+ VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
static void
free_mem_region(struct virtio_net *dev)
{
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (reg->host_user_addr) {
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(reg, false);
+
munmap(reg->mmap_addr, reg->mmap_size);
close(reg->fd);
}
dev->inflight_info->fd = -1;
}
- free(dev->inflight_info);
+ rte_free(dev->inflight_info);
dev->inflight_info = NULL;
}
vq->log_cache = lc;
}
+ if (vq->resubmit_inflight) {
+ struct rte_vhost_resubmit_info *ri;
+
+ ri = rte_realloc_socket(vq->resubmit_inflight, sizeof(*ri), 0, node);
+ if (!ri) {
+ VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit inflight on node %d\n",
+ node);
+ return dev;
+ }
+ vq->resubmit_inflight = ri;
+
+ if (ri->resubmit_list) {
+ struct rte_vhost_resubmit_desc *rd;
+
+ rd = rte_realloc_socket(ri->resubmit_list, sizeof(*rd) * ri->resubmit_num,
+ 0, node);
+ if (!rd) {
+ VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit list on node %d\n",
+ node);
+ return dev;
+ }
+ ri->resubmit_list = rd;
+ }
+ }
+
vq->numa_node = node;
out_dev_realloc:
vq->last_avail_idx = msg->payload.state.num;
}
+ VHOST_LOG_CONFIG(INFO,
+ "(%s) vring base idx:%u last_used_idx:%u last_avail_idx:%u.\n",
+ dev->ifname, msg->payload.state.index, vq->last_used_idx,
+ vq->last_avail_idx);
+
return RTE_VHOST_MSG_RESULT_OK;
}
struct uffdio_register reg_struct;
/*
- * Let's register all the mmap'ed area to ensure
+ * Let's register all the mmapped area to ensure
* alignment on page boundary.
*/
reg_struct.range.start = (uint64_t)(uintptr_t)reg->mmap_addr;
msg->fd_num = 0;
send_vhost_reply(main_fd, msg);
- /* Wait for qemu to acknolwedge it's got the addresses
+ /* Wait for qemu to acknowledge it got the addresses
* we've got to wait before we're allowed to generate faults.
*/
if (read_vhost_message(main_fd, &ack_msg) <= 0) {
uint64_t mmap_size;
uint64_t alignment;
int populate;
+ int ret;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -region->size) {
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
- if (dev->async_copy)
+ if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
- VHOST_LOG_CONFIG(ERR,
- "adding guest pages to region failed.\n");
+ VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
return -1;
}
+ if (rte_vfio_is_enabled("vfio")) {
+ ret = async_dma_map(region, true);
+ if (ret) {
+ VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+ return -1;
+ }
+ }
+ }
+
VHOST_LOG_CONFIG(INFO,
"guest memory region size: 0x%" PRIx64 "\n"
"\t guest physical addr: 0x%" PRIx64 "\n"
int numa_node = SOCKET_ID_ANY;
uint64_t mmap_offset;
uint32_t i;
+ bool async_notify = false;
if (validate_msg_fds(msg, memory->nregions) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+ /* notify the vhost application to stop DMA transfers */
+ if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+ for (i = 0; i < dev->nr_vring; i++) {
+ dev->notify_ops->vring_state_changed(dev->vid,
+ i, 0);
+ }
+ async_notify = true;
+ }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
dump_guest_pages(dev);
+ if (async_notify) {
+ for (i = 0; i < dev->nr_vring; i++)
+ dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+ }
+
return RTE_VHOST_MSG_RESULT_OK;
free_mem_table:
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
+
free_guest_pages:
rte_free(dev->guest_pages);
dev->guest_pages = NULL;
uint16_t num_queues, queue_size;
struct virtio_net *dev = *pdev;
int fd, i, j;
+ int numa_node = SOCKET_ID_ANY;
void *addr;
if (msg->size != sizeof(msg->payload.inflight)) {
return RTE_VHOST_MSG_RESULT_ERR;
}
+ /*
+ * If VQ 0 has already been allocated, try to allocate on the same
+ * NUMA node. It can be reallocated later in numa_realloc().
+ */
+ if (dev->nr_vring > 0)
+ numa_node = dev->virtqueue[0]->numa_node;
+
if (dev->inflight_info == NULL) {
- dev->inflight_info = calloc(1,
- sizeof(struct inflight_mem_info));
+ dev->inflight_info = rte_zmalloc_socket("inflight_info",
+ sizeof(struct inflight_mem_info), 0, numa_node);
if (!dev->inflight_info) {
VHOST_LOG_CONFIG(ERR,
"failed to alloc dev inflight area\n");
struct vhost_virtqueue *vq;
void *addr;
int fd, i;
+ int numa_node = SOCKET_ID_ANY;
fd = msg->fds[0];
if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
"set_inflight_fd pervq_inflight_size: %d\n",
pervq_inflight_size);
+ /*
+ * If VQ 0 has already been allocated, try to allocate on the same
+ * NUMA node. It can be reallocated later in numa_realloc().
+ */
+ if (dev->nr_vring > 0)
+ numa_node = dev->virtqueue[0]->numa_node;
+
if (!dev->inflight_info) {
- dev->inflight_info = calloc(1,
- sizeof(struct inflight_mem_info));
+ dev->inflight_info = rte_zmalloc_socket("inflight_info",
+ sizeof(struct inflight_mem_info), 0, numa_node);
if (dev->inflight_info == NULL) {
VHOST_LOG_CONFIG(ERR,
"failed to alloc dev inflight area\n");
vq->last_avail_idx += resubmit_num;
if (resubmit_num) {
- resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+ 0, vq->numa_node);
if (!resubmit) {
VHOST_LOG_CONFIG(ERR,
"failed to allocate memory for resubmit info.\n");
return RTE_VHOST_MSG_RESULT_ERR;
}
- resubmit->resubmit_list = calloc(resubmit_num,
- sizeof(struct rte_vhost_resubmit_desc));
+ resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+ resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+ 0, vq->numa_node);
if (!resubmit->resubmit_list) {
VHOST_LOG_CONFIG(ERR,
"failed to allocate memory for inflight desc.\n");
- free(resubmit);
+ rte_free(resubmit);
return RTE_VHOST_MSG_RESULT_ERR;
}
}
if (resubmit_num) {
- resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+ 0, vq->numa_node);
if (resubmit == NULL) {
VHOST_LOG_CONFIG(ERR,
"failed to allocate memory for resubmit info.\n");
return RTE_VHOST_MSG_RESULT_ERR;
}
- resubmit->resubmit_list = calloc(resubmit_num,
- sizeof(struct rte_vhost_resubmit_desc));
+ resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+ resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+ 0, vq->numa_node);
if (resubmit->resubmit_list == NULL) {
VHOST_LOG_CONFIG(ERR,
"failed to allocate memory for resubmit desc.\n");
- free(resubmit);
+ rte_free(resubmit);
return RTE_VHOST_MSG_RESULT_ERR;
}
msg->size = sizeof(msg->payload.state);
msg->fd_num = 0;
+ vhost_user_iotlb_flush_all(vq);
+
vring_invalidate(dev, vq);
return RTE_VHOST_MSG_RESULT_REPLY;
"set queue enable: %d to qp idx: %d\n",
enable, index);
- if (enable && dev->virtqueue[index]->async_registered) {
- if (dev->virtqueue[index]->async_pkts_inflight_n) {
+ if (enable && dev->virtqueue[index]->async) {
+ if (dev->virtqueue[index]->async->pkts_inflight_n) {
VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
"async inflight packets must be completed first\n");
return RTE_VHOST_MSG_RESULT_ERR;
return RTE_VHOST_MSG_RESULT_ERR;
VHOST_LOG_CONFIG(DEBUG,
- ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ":: mac: " RTE_ETHER_ADDR_PRT_FMT "\n",
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
memcpy(dev->mac.addr_bytes, mac, 6);
break;
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_BASE:
+ case VHOST_USER_GET_VRING_BASE:
case VHOST_USER_SET_VRING_ENABLE:
vring_idx = msg->payload.state.index;
break;
}
}
- if (unlock_required)
- vhost_user_unlock_all_queue_pairs(dev);
-
/* If message was not handled at this stage, treat it as an error */
if (!handled) {
VHOST_LOG_CONFIG(ERR,
}
}
+ if (unlock_required)
+ vhost_user_unlock_all_queue_pairs(dev);
if (!virtio_is_ready(dev))
goto out;