X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Fvhost%2Fvhost_user.c;h=a781346c4d0c006c52c5d8c9ab24725850fef5ef;hb=ee8024b3d4ad375cc5e28d493449c5dcea335540;hp=d8ec087dfceb8358325861295ba894721ee6323b;hpb=b81c93466df0c19ec3a07690b8bca64d257142eb;p=dpdk.git diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index d8ec087dfc..a781346c4d 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include "iotlb.h" #include "vhost.h" @@ -141,6 +143,59 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } +static int +async_dma_map(struct rte_vhost_mem_region *region, bool do_map) +{ + uint64_t host_iova; + int ret = 0; + + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); + if (do_map) { + /* Add mapped region into the default container of DPDK. */ + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, + region->host_user_addr, + host_iova, + region->size); + if (ret) { + /* + * DMA device may bind with kernel driver, in this case, + * we don't need to program IOMMU manually. However, if no + * device is bound with vfio/uio in DPDK, and vfio kernel + * module is loaded, the API will still be called and return + * with ENODEV/ENOSUP. + * + * DPDK vfio only returns ENODEV/ENOSUP in very similar + * situations(vfio either unsupported, or supported + * but no devices found). Either way, no mappings could be + * performed. We treat it as normal case in async path. + */ + if (rte_errno == ENODEV || rte_errno == ENOTSUP) + return 0; + + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); + /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */ + return 0; + } + + } else { + /* Remove mapped region from the default container of DPDK. */ + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + region->host_user_addr, + host_iova, + region->size); + if (ret) { + /* like DMA map, ignore the kernel driver case when unmap. */ + if (rte_errno == EINVAL) + return 0; + + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); + return ret; + } + } + + return ret; +} + static void free_mem_region(struct virtio_net *dev) { @@ -153,6 +208,9 @@ free_mem_region(struct virtio_net *dev) for (i = 0; i < dev->mem->nregions; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr) { + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(reg, false); + munmap(reg->mmap_addr, reg->mmap_size); close(reg->fd); } @@ -188,7 +246,7 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->inflight_info->fd = -1; } - free(dev->inflight_info); + rte_free(dev->inflight_info); dev->inflight_info = NULL; } @@ -562,6 +620,31 @@ numa_realloc(struct virtio_net *dev, int index) vq->log_cache = lc; } + if (vq->resubmit_inflight) { + struct rte_vhost_resubmit_info *ri; + + ri = rte_realloc_socket(vq->resubmit_inflight, sizeof(*ri), 0, node); + if (!ri) { + VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit inflight on node %d\n", + node); + return dev; + } + vq->resubmit_inflight = ri; + + if (ri->resubmit_list) { + struct rte_vhost_resubmit_desc *rd; + + rd = rte_realloc_socket(ri->resubmit_list, sizeof(*rd) * ri->resubmit_num, + 0, node); + if (!rd) { + VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit list on node %d\n", + node); + return dev; + } + ri->resubmit_list = rd; + } + } + vq->numa_node = node; out_dev_realloc: @@ -1132,6 +1215,7 @@ vhost_user_mmap_region(struct virtio_net *dev, uint64_t mmap_size; uint64_t alignment; int populate; + int ret; /* Check for memory_size + mmap_offset overflow */ if (mmap_offset >= -region->size) { @@ -1185,13 +1269,21 @@ vhost_user_mmap_region(struct virtio_net *dev, region->mmap_size = mmap_size; region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset; - if (dev->async_copy) + if (dev->async_copy) { if (add_guest_pages(dev, region, alignment) < 0) { - VHOST_LOG_CONFIG(ERR, - "adding guest pages to region failed.\n"); + VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n"); return -1; } + if (rte_vfio_is_enabled("vfio")) { + ret = async_dma_map(region, true); + if (ret) { + VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n"); + return -1; + } + } + } + VHOST_LOG_CONFIG(INFO, "guest memory region size: 0x%" PRIx64 "\n" "\t guest physical addr: 0x%" PRIx64 "\n" @@ -1223,6 +1315,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, int numa_node = SOCKET_ID_ANY; uint64_t mmap_offset; uint32_t i; + bool async_notify = false; if (validate_msg_fds(msg, memory->nregions) != 0) return RTE_VHOST_MSG_RESULT_ERR; @@ -1250,6 +1343,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, vdpa_dev->ops->dev_close(dev->vid); dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; } + + /* notify the vhost application to stop DMA transfers */ + if (dev->async_copy && dev->notify_ops->vring_state_changed) { + for (i = 0; i < dev->nr_vring; i++) { + dev->notify_ops->vring_state_changed(dev->vid, + i, 0); + } + async_notify = true; + } + free_mem_region(dev); rte_free(dev->mem); dev->mem = NULL; @@ -1346,12 +1449,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, dump_guest_pages(dev); + if (async_notify) { + for (i = 0; i < dev->nr_vring; i++) + dev->notify_ops->vring_state_changed(dev->vid, i, 1); + } + return RTE_VHOST_MSG_RESULT_OK; free_mem_table: free_mem_region(dev); rte_free(dev->mem); dev->mem = NULL; + free_guest_pages: rte_free(dev->guest_pages); dev->guest_pages = NULL; @@ -1491,6 +1600,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, uint16_t num_queues, queue_size; struct virtio_net *dev = *pdev; int fd, i, j; + int numa_node = SOCKET_ID_ANY; void *addr; if (msg->size != sizeof(msg->payload.inflight)) { @@ -1500,9 +1610,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, return RTE_VHOST_MSG_RESULT_ERR; } + /* + * If VQ 0 has already been allocated, try to allocate on the same + * NUMA node. It can be reallocated later in numa_realloc(). + */ + if (dev->nr_vring > 0) + numa_node = dev->virtqueue[0]->numa_node; + if (dev->inflight_info == NULL) { - dev->inflight_info = calloc(1, - sizeof(struct inflight_mem_info)); + dev->inflight_info = rte_zmalloc_socket("inflight_info", + sizeof(struct inflight_mem_info), 0, numa_node); if (!dev->inflight_info) { VHOST_LOG_CONFIG(ERR, "failed to alloc dev inflight area\n"); @@ -1585,6 +1702,7 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, struct vhost_virtqueue *vq; void *addr; int fd, i; + int numa_node = SOCKET_ID_ANY; fd = msg->fds[0]; if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { @@ -1618,9 +1736,16 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, "set_inflight_fd pervq_inflight_size: %d\n", pervq_inflight_size); + /* + * If VQ 0 has already been allocated, try to allocate on the same + * NUMA node. It can be reallocated later in numa_realloc(). + */ + if (dev->nr_vring > 0) + numa_node = dev->virtqueue[0]->numa_node; + if (!dev->inflight_info) { - dev->inflight_info = calloc(1, - sizeof(struct inflight_mem_info)); + dev->inflight_info = rte_zmalloc_socket("inflight_info", + sizeof(struct inflight_mem_info), 0, numa_node); if (dev->inflight_info == NULL) { VHOST_LOG_CONFIG(ERR, "failed to alloc dev inflight area\n"); @@ -1779,19 +1904,21 @@ vhost_check_queue_inflights_split(struct virtio_net *dev, vq->last_avail_idx += resubmit_num; if (resubmit_num) { - resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info)); + resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info), + 0, vq->numa_node); if (!resubmit) { VHOST_LOG_CONFIG(ERR, "failed to allocate memory for resubmit info.\n"); return RTE_VHOST_MSG_RESULT_ERR; } - resubmit->resubmit_list = calloc(resubmit_num, - sizeof(struct rte_vhost_resubmit_desc)); + resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list", + resubmit_num * sizeof(struct rte_vhost_resubmit_desc), + 0, vq->numa_node); if (!resubmit->resubmit_list) { VHOST_LOG_CONFIG(ERR, "failed to allocate memory for inflight desc.\n"); - free(resubmit); + rte_free(resubmit); return RTE_VHOST_MSG_RESULT_ERR; } @@ -1873,19 +2000,21 @@ vhost_check_queue_inflights_packed(struct virtio_net *dev, } if (resubmit_num) { - resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info)); + resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info), + 0, vq->numa_node); if (resubmit == NULL) { VHOST_LOG_CONFIG(ERR, "failed to allocate memory for resubmit info.\n"); return RTE_VHOST_MSG_RESULT_ERR; } - resubmit->resubmit_list = calloc(resubmit_num, - sizeof(struct rte_vhost_resubmit_desc)); + resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list", + resubmit_num * sizeof(struct rte_vhost_resubmit_desc), + 0, vq->numa_node); if (resubmit->resubmit_list == NULL) { VHOST_LOG_CONFIG(ERR, "failed to allocate memory for resubmit desc.\n"); - free(resubmit); + rte_free(resubmit); return RTE_VHOST_MSG_RESULT_ERR; } @@ -2052,6 +2181,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev, msg->size = sizeof(msg->payload.state); msg->fd_num = 0; + vhost_user_iotlb_flush_all(vq); + vring_invalidate(dev, vq); return RTE_VHOST_MSG_RESULT_REPLY; @@ -2077,8 +2208,8 @@ vhost_user_set_vring_enable(struct virtio_net **pdev, "set queue enable: %d to qp idx: %d\n", enable, index); - if (enable && dev->virtqueue[index]->async_registered) { - if (dev->virtqueue[index]->async_pkts_inflight_n) { + if (enable && dev->virtqueue[index]->async) { + if (dev->virtqueue[index]->async->pkts_inflight_n) { VHOST_LOG_CONFIG(ERR, "failed to enable vring. " "async inflight packets must be completed first\n"); return RTE_VHOST_MSG_RESULT_ERR; @@ -2266,7 +2397,7 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg, return RTE_VHOST_MSG_RESULT_ERR; VHOST_LOG_CONFIG(DEBUG, - ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", + ":: mac: " RTE_ETHER_ADDR_PRT_FMT "\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); memcpy(dev->mac.addr_bytes, mac, 6); @@ -2735,6 +2866,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, break; case VHOST_USER_SET_VRING_NUM: case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_GET_VRING_BASE: case VHOST_USER_SET_VRING_ENABLE: vring_idx = msg->payload.state.index; break; @@ -2950,9 +3082,6 @@ skip_to_post_handle: } } - if (unlock_required) - vhost_user_unlock_all_queue_pairs(dev); - /* If message was not handled at this stage, treat it as an error */ if (!handled) { VHOST_LOG_CONFIG(ERR, @@ -2987,6 +3116,8 @@ skip_to_post_handle: } } + if (unlock_required) + vhost_user_unlock_all_queue_pairs(dev); if (!virtio_is_ready(dev)) goto out;