X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Fvhost%2Fvhost_user.c;h=a781346c4d0c006c52c5d8c9ab24725850fef5ef;hb=ee8024b3d4ad375cc5e28d493449c5dcea335540;hp=d8ec087dfceb8358325861295ba894721ee6323b;hpb=b81c93466df0c19ec3a07690b8bca64d257142eb;p=dpdk.git

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index d8ec087dfc..a781346c4d 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@
 #include <rte_common.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -141,6 +143,59 @@ get_blk_size(int fd)
 	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+	uint64_t host_iova;
+	int ret = 0;
+
+	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	if (do_map) {
+		/* Add mapped region into the default container of DPDK. */
+		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+						 region->host_user_addr,
+						 host_iova,
+						 region->size);
+		if (ret) {
+			/*
+			 * DMA device may bind with kernel driver, in this case,
+			 * we don't need to program IOMMU manually. However, if no
+			 * device is bound with vfio/uio in DPDK, and vfio kernel
+			 * module is loaded, the API will still be called and return
+			 * with ENODEV/ENOSUP.
+			 *
+			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
+			 * situations(vfio either unsupported, or supported
+			 * but no devices found). Either way, no mappings could be
+			 * performed. We treat it as normal case in async path.
+			 */
+			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+				return 0;
+
+			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
+			return 0;
+		}
+
+	} else {
+		/* Remove mapped region from the default container of DPDK. */
+		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+						   region->host_user_addr,
+						   host_iova,
+						   region->size);
+		if (ret) {
+			/* like DMA map, ignore the kernel driver case when unmap. */
+			if (rte_errno == EINVAL)
+				return 0;
+
+			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
 static void
 free_mem_region(struct virtio_net *dev)
 {
@@ -153,6 +208,9 @@ free_mem_region(struct virtio_net *dev)
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
+			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+				async_dma_map(reg, false);
+
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -188,7 +246,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 			dev->inflight_info->fd = -1;
 		}
 
-		free(dev->inflight_info);
+		rte_free(dev->inflight_info);
 		dev->inflight_info = NULL;
 	}
 
@@ -562,6 +620,31 @@ numa_realloc(struct virtio_net *dev, int index)
 		vq->log_cache = lc;
 	}
 
+	if (vq->resubmit_inflight) {
+		struct rte_vhost_resubmit_info *ri;
+
+		ri = rte_realloc_socket(vq->resubmit_inflight, sizeof(*ri), 0, node);
+		if (!ri) {
+			VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit inflight on node %d\n",
+					node);
+			return dev;
+		}
+		vq->resubmit_inflight = ri;
+
+		if (ri->resubmit_list) {
+			struct rte_vhost_resubmit_desc *rd;
+
+			rd = rte_realloc_socket(ri->resubmit_list, sizeof(*rd) * ri->resubmit_num,
+					0, node);
+			if (!rd) {
+				VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit list on node %d\n",
+						node);
+				return dev;
+			}
+			ri->resubmit_list = rd;
+		}
+	}
+
 	vq->numa_node = node;
 
 out_dev_realloc:
@@ -1132,6 +1215,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
+	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1185,13 +1269,21 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	region->mmap_size = mmap_size;
 	region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
 
-	if (dev->async_copy)
+	if (dev->async_copy) {
 		if (add_guest_pages(dev, region, alignment) < 0) {
-			VHOST_LOG_CONFIG(ERR,
-					"adding guest pages to region failed.\n");
+			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
 			return -1;
 		}
 
+		if (rte_vfio_is_enabled("vfio")) {
+			ret = async_dma_map(region, true);
+			if (ret) {
+				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+				return -1;
+			}
+		}
+	}
+
 	VHOST_LOG_CONFIG(INFO,
 			"guest memory region size: 0x%" PRIx64 "\n"
 			"\t guest physical addr: 0x%" PRIx64 "\n"
@@ -1223,6 +1315,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	int numa_node = SOCKET_ID_ANY;
 	uint64_t mmap_offset;
 	uint32_t i;
+	bool async_notify = false;
 
 	if (validate_msg_fds(msg, memory->nregions) != 0)
 		return RTE_VHOST_MSG_RESULT_ERR;
@@ -1250,6 +1343,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 				vdpa_dev->ops->dev_close(dev->vid);
 			dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
 		}
+
+		/* notify the vhost application to stop DMA transfers */
+		if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+			for (i = 0; i < dev->nr_vring; i++) {
+				dev->notify_ops->vring_state_changed(dev->vid,
+						i, 0);
+			}
+			async_notify = true;
+		}
+
 		free_mem_region(dev);
 		rte_free(dev->mem);
 		dev->mem = NULL;
@@ -1346,12 +1449,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 
 	dump_guest_pages(dev);
 
+	if (async_notify) {
+		for (i = 0; i < dev->nr_vring; i++)
+			dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+	}
+
 	return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
 	free_mem_region(dev);
 	rte_free(dev->mem);
 	dev->mem = NULL;
+
 free_guest_pages:
 	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
@@ -1491,6 +1600,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
 	uint16_t num_queues, queue_size;
 	struct virtio_net *dev = *pdev;
 	int fd, i, j;
+	int numa_node = SOCKET_ID_ANY;
 	void *addr;
 
 	if (msg->size != sizeof(msg->payload.inflight)) {
@@ -1500,9 +1610,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
 		return RTE_VHOST_MSG_RESULT_ERR;
 	}
 
+	/*
+	 * If VQ 0 has already been allocated, try to allocate on the same
+	 * NUMA node. It can be reallocated later in numa_realloc().
+	 */
+	if (dev->nr_vring > 0)
+		numa_node = dev->virtqueue[0]->numa_node;
+
 	if (dev->inflight_info == NULL) {
-		dev->inflight_info = calloc(1,
-					    sizeof(struct inflight_mem_info));
+		dev->inflight_info = rte_zmalloc_socket("inflight_info",
+				sizeof(struct inflight_mem_info), 0, numa_node);
 		if (!dev->inflight_info) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to alloc dev inflight area\n");
@@ -1585,6 +1702,7 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
 	struct vhost_virtqueue *vq;
 	void *addr;
 	int fd, i;
+	int numa_node = SOCKET_ID_ANY;
 
 	fd = msg->fds[0];
 	if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
@@ -1618,9 +1736,16 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
 		"set_inflight_fd pervq_inflight_size: %d\n",
 		pervq_inflight_size);
 
+	/*
+	 * If VQ 0 has already been allocated, try to allocate on the same
+	 * NUMA node. It can be reallocated later in numa_realloc().
+	 */
+	if (dev->nr_vring > 0)
+		numa_node = dev->virtqueue[0]->numa_node;
+
 	if (!dev->inflight_info) {
-		dev->inflight_info = calloc(1,
-					    sizeof(struct inflight_mem_info));
+		dev->inflight_info = rte_zmalloc_socket("inflight_info",
+				sizeof(struct inflight_mem_info), 0, numa_node);
 		if (dev->inflight_info == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to alloc dev inflight area\n");
@@ -1779,19 +1904,21 @@ vhost_check_queue_inflights_split(struct virtio_net *dev,
 	vq->last_avail_idx += resubmit_num;
 
 	if (resubmit_num) {
-		resubmit  = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+		resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+				0, vq->numa_node);
 		if (!resubmit) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to allocate memory for resubmit info.\n");
 			return RTE_VHOST_MSG_RESULT_ERR;
 		}
 
-		resubmit->resubmit_list = calloc(resubmit_num,
-			sizeof(struct rte_vhost_resubmit_desc));
+		resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+				resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+				0, vq->numa_node);
 		if (!resubmit->resubmit_list) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to allocate memory for inflight desc.\n");
-			free(resubmit);
+			rte_free(resubmit);
 			return RTE_VHOST_MSG_RESULT_ERR;
 		}
 
@@ -1873,19 +2000,21 @@ vhost_check_queue_inflights_packed(struct virtio_net *dev,
 	}
 
 	if (resubmit_num) {
-		resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+		resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+				0, vq->numa_node);
 		if (resubmit == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to allocate memory for resubmit info.\n");
 			return RTE_VHOST_MSG_RESULT_ERR;
 		}
 
-		resubmit->resubmit_list = calloc(resubmit_num,
-			sizeof(struct rte_vhost_resubmit_desc));
+		resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+				resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+				0, vq->numa_node);
 		if (resubmit->resubmit_list == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"failed to allocate memory for resubmit desc.\n");
-			free(resubmit);
+			rte_free(resubmit);
 			return RTE_VHOST_MSG_RESULT_ERR;
 		}
 
@@ -2052,6 +2181,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
 	msg->size = sizeof(msg->payload.state);
 	msg->fd_num = 0;
 
+	vhost_user_iotlb_flush_all(vq);
+
 	vring_invalidate(dev, vq);
 
 	return RTE_VHOST_MSG_RESULT_REPLY;
@@ -2077,8 +2208,8 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
 		"set queue enable: %d to qp idx: %d\n",
 		enable, index);
 
-	if (enable && dev->virtqueue[index]->async_registered) {
-		if (dev->virtqueue[index]->async_pkts_inflight_n) {
+	if (enable && dev->virtqueue[index]->async) {
+		if (dev->virtqueue[index]->async->pkts_inflight_n) {
 			VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
 			"async inflight packets must be completed first\n");
 			return RTE_VHOST_MSG_RESULT_ERR;
@@ -2266,7 +2397,7 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		return RTE_VHOST_MSG_RESULT_ERR;
 
 	VHOST_LOG_CONFIG(DEBUG,
-		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		":: mac: " RTE_ETHER_ADDR_PRT_FMT "\n",
 		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
 	memcpy(dev->mac.addr_bytes, mac, 6);
 
@@ -2735,6 +2866,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
 		break;
 	case VHOST_USER_SET_VRING_NUM:
 	case VHOST_USER_SET_VRING_BASE:
+	case VHOST_USER_GET_VRING_BASE:
 	case VHOST_USER_SET_VRING_ENABLE:
 		vring_idx = msg->payload.state.index;
 		break;
@@ -2950,9 +3082,6 @@ skip_to_post_handle:
 		}
 	}
 
-	if (unlock_required)
-		vhost_user_unlock_all_queue_pairs(dev);
-
 	/* If message was not handled at this stage, treat it as an error */
 	if (!handled) {
 		VHOST_LOG_CONFIG(ERR,
@@ -2987,6 +3116,8 @@ skip_to_post_handle:
 		}
 	}
 
+	if (unlock_required)
+		vhost_user_unlock_all_queue_pairs(dev);
 
 	if (!virtio_is_ready(dev))
 		goto out;