vhost: add log when setting vring base
[dpdk.git] / lib / vhost / vhost_user.c
index d8ec087..f99692b 100644 (file)
@@ -45,6 +45,8 @@
 #include <rte_common.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -141,6 +143,59 @@ get_blk_size(int fd)
        return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+       uint64_t host_iova;
+       int ret = 0;
+
+       host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+       if (do_map) {
+               /* Add mapped region into the default container of DPDK. */
+               ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+                                                region->host_user_addr,
+                                                host_iova,
+                                                region->size);
+               if (ret) {
+                       /*
+                        * DMA device may bind with kernel driver, in this case,
+                        * we don't need to program IOMMU manually. However, if no
+                        * device is bound with vfio/uio in DPDK, and vfio kernel
+                        * module is loaded, the API will still be called and return
+                        * with ENODEV/ENOSUP.
+                        *
+                        * DPDK vfio only returns ENODEV/ENOSUP in very similar
+                        * situations(vfio either unsupported, or supported
+                        * but no devices found). Either way, no mappings could be
+                        * performed. We treat it as normal case in async path.
+                        */
+                       if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+                               return 0;
+
+                       VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+                       /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
+                       return 0;
+               }
+
+       } else {
+               /* Remove mapped region from the default container of DPDK. */
+               ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+                                                  region->host_user_addr,
+                                                  host_iova,
+                                                  region->size);
+               if (ret) {
+                       /* like DMA map, ignore the kernel driver case when unmap. */
+                       if (rte_errno == EINVAL)
+                               return 0;
+
+                       VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
 static void
 free_mem_region(struct virtio_net *dev)
 {
@@ -153,6 +208,9 @@ free_mem_region(struct virtio_net *dev)
        for (i = 0; i < dev->mem->nregions; i++) {
                reg = &dev->mem->regions[i];
                if (reg->host_user_addr) {
+                       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+                               async_dma_map(reg, false);
+
                        munmap(reg->mmap_addr, reg->mmap_size);
                        close(reg->fd);
                }
@@ -188,7 +246,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
                        dev->inflight_info->fd = -1;
                }
 
-               free(dev->inflight_info);
+               rte_free(dev->inflight_info);
                dev->inflight_info = NULL;
        }
 
@@ -562,6 +620,31 @@ numa_realloc(struct virtio_net *dev, int index)
                vq->log_cache = lc;
        }
 
+       if (vq->resubmit_inflight) {
+               struct rte_vhost_resubmit_info *ri;
+
+               ri = rte_realloc_socket(vq->resubmit_inflight, sizeof(*ri), 0, node);
+               if (!ri) {
+                       VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit inflight on node %d\n",
+                                       node);
+                       return dev;
+               }
+               vq->resubmit_inflight = ri;
+
+               if (ri->resubmit_list) {
+                       struct rte_vhost_resubmit_desc *rd;
+
+                       rd = rte_realloc_socket(ri->resubmit_list, sizeof(*rd) * ri->resubmit_num,
+                                       0, node);
+                       if (!rd) {
+                               VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit list on node %d\n",
+                                               node);
+                               return dev;
+                       }
+                       ri->resubmit_list = rd;
+               }
+       }
+
        vq->numa_node = node;
 
 out_dev_realloc:
@@ -890,6 +973,11 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
                vq->last_avail_idx = msg->payload.state.num;
        }
 
+       VHOST_LOG_CONFIG(INFO,
+               "(%s) vring base idx:%u last_used_idx:%u last_avail_idx:%u.\n",
+               dev->ifname, msg->payload.state.index, vq->last_used_idx,
+               vq->last_avail_idx);
+
        return RTE_VHOST_MSG_RESULT_OK;
 }
 
@@ -1032,7 +1120,7 @@ vhost_user_postcopy_region_register(struct virtio_net *dev,
        struct uffdio_register reg_struct;
 
        /*
-        * Let's register all the mmap'ed area to ensure
+        * Let's register all the mmapped area to ensure
         * alignment on page boundary.
         */
        reg_struct.range.start = (uint64_t)(uintptr_t)reg->mmap_addr;
@@ -1094,7 +1182,7 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,
        msg->fd_num = 0;
        send_vhost_reply(main_fd, msg);
 
-       /* Wait for qemu to acknolwedge it's got the addresses
+       /* Wait for qemu to acknowledge it got the addresses
         * we've got to wait before we're allowed to generate faults.
         */
        if (read_vhost_message(main_fd, &ack_msg) <= 0) {
@@ -1132,6 +1220,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
        uint64_t mmap_size;
        uint64_t alignment;
        int populate;
+       int ret;
 
        /* Check for memory_size + mmap_offset overflow */
        if (mmap_offset >= -region->size) {
@@ -1185,13 +1274,21 @@ vhost_user_mmap_region(struct virtio_net *dev,
        region->mmap_size = mmap_size;
        region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
 
-       if (dev->async_copy)
+       if (dev->async_copy) {
                if (add_guest_pages(dev, region, alignment) < 0) {
-                       VHOST_LOG_CONFIG(ERR,
-                                       "adding guest pages to region failed.\n");
+                       VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
                        return -1;
                }
 
+               if (rte_vfio_is_enabled("vfio")) {
+                       ret = async_dma_map(region, true);
+                       if (ret) {
+                               VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+                               return -1;
+                       }
+               }
+       }
+
        VHOST_LOG_CONFIG(INFO,
                        "guest memory region size: 0x%" PRIx64 "\n"
                        "\t guest physical addr: 0x%" PRIx64 "\n"
@@ -1223,6 +1320,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
        int numa_node = SOCKET_ID_ANY;
        uint64_t mmap_offset;
        uint32_t i;
+       bool async_notify = false;
 
        if (validate_msg_fds(msg, memory->nregions) != 0)
                return RTE_VHOST_MSG_RESULT_ERR;
@@ -1250,6 +1348,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
                                vdpa_dev->ops->dev_close(dev->vid);
                        dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
                }
+
+               /* notify the vhost application to stop DMA transfers */
+               if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+                       for (i = 0; i < dev->nr_vring; i++) {
+                               dev->notify_ops->vring_state_changed(dev->vid,
+                                               i, 0);
+                       }
+                       async_notify = true;
+               }
+
                free_mem_region(dev);
                rte_free(dev->mem);
                dev->mem = NULL;
@@ -1346,12 +1454,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 
        dump_guest_pages(dev);
 
+       if (async_notify) {
+               for (i = 0; i < dev->nr_vring; i++)
+                       dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+       }
+
        return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
        free_mem_region(dev);
        rte_free(dev->mem);
        dev->mem = NULL;
+
 free_guest_pages:
        rte_free(dev->guest_pages);
        dev->guest_pages = NULL;
@@ -1491,6 +1605,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
        uint16_t num_queues, queue_size;
        struct virtio_net *dev = *pdev;
        int fd, i, j;
+       int numa_node = SOCKET_ID_ANY;
        void *addr;
 
        if (msg->size != sizeof(msg->payload.inflight)) {
@@ -1500,9 +1615,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
                return RTE_VHOST_MSG_RESULT_ERR;
        }
 
+       /*
+        * If VQ 0 has already been allocated, try to allocate on the same
+        * NUMA node. It can be reallocated later in numa_realloc().
+        */
+       if (dev->nr_vring > 0)
+               numa_node = dev->virtqueue[0]->numa_node;
+
        if (dev->inflight_info == NULL) {
-               dev->inflight_info = calloc(1,
-                                           sizeof(struct inflight_mem_info));
+               dev->inflight_info = rte_zmalloc_socket("inflight_info",
+                               sizeof(struct inflight_mem_info), 0, numa_node);
                if (!dev->inflight_info) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to alloc dev inflight area\n");
@@ -1585,6 +1707,7 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
        struct vhost_virtqueue *vq;
        void *addr;
        int fd, i;
+       int numa_node = SOCKET_ID_ANY;
 
        fd = msg->fds[0];
        if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
@@ -1618,9 +1741,16 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
                "set_inflight_fd pervq_inflight_size: %d\n",
                pervq_inflight_size);
 
+       /*
+        * If VQ 0 has already been allocated, try to allocate on the same
+        * NUMA node. It can be reallocated later in numa_realloc().
+        */
+       if (dev->nr_vring > 0)
+               numa_node = dev->virtqueue[0]->numa_node;
+
        if (!dev->inflight_info) {
-               dev->inflight_info = calloc(1,
-                                           sizeof(struct inflight_mem_info));
+               dev->inflight_info = rte_zmalloc_socket("inflight_info",
+                               sizeof(struct inflight_mem_info), 0, numa_node);
                if (dev->inflight_info == NULL) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to alloc dev inflight area\n");
@@ -1779,19 +1909,21 @@ vhost_check_queue_inflights_split(struct virtio_net *dev,
        vq->last_avail_idx += resubmit_num;
 
        if (resubmit_num) {
-               resubmit  = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+               resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+                               0, vq->numa_node);
                if (!resubmit) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to allocate memory for resubmit info.\n");
                        return RTE_VHOST_MSG_RESULT_ERR;
                }
 
-               resubmit->resubmit_list = calloc(resubmit_num,
-                       sizeof(struct rte_vhost_resubmit_desc));
+               resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+                               resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+                               0, vq->numa_node);
                if (!resubmit->resubmit_list) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to allocate memory for inflight desc.\n");
-                       free(resubmit);
+                       rte_free(resubmit);
                        return RTE_VHOST_MSG_RESULT_ERR;
                }
 
@@ -1873,19 +2005,21 @@ vhost_check_queue_inflights_packed(struct virtio_net *dev,
        }
 
        if (resubmit_num) {
-               resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+               resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+                               0, vq->numa_node);
                if (resubmit == NULL) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to allocate memory for resubmit info.\n");
                        return RTE_VHOST_MSG_RESULT_ERR;
                }
 
-               resubmit->resubmit_list = calloc(resubmit_num,
-                       sizeof(struct rte_vhost_resubmit_desc));
+               resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+                               resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+                               0, vq->numa_node);
                if (resubmit->resubmit_list == NULL) {
                        VHOST_LOG_CONFIG(ERR,
                                "failed to allocate memory for resubmit desc.\n");
-                       free(resubmit);
+                       rte_free(resubmit);
                        return RTE_VHOST_MSG_RESULT_ERR;
                }
 
@@ -2052,6 +2186,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
        msg->size = sizeof(msg->payload.state);
        msg->fd_num = 0;
 
+       vhost_user_iotlb_flush_all(vq);
+
        vring_invalidate(dev, vq);
 
        return RTE_VHOST_MSG_RESULT_REPLY;
@@ -2077,8 +2213,8 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
                "set queue enable: %d to qp idx: %d\n",
                enable, index);
 
-       if (enable && dev->virtqueue[index]->async_registered) {
-               if (dev->virtqueue[index]->async_pkts_inflight_n) {
+       if (enable && dev->virtqueue[index]->async) {
+               if (dev->virtqueue[index]->async->pkts_inflight_n) {
                        VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
                        "async inflight packets must be completed first\n");
                        return RTE_VHOST_MSG_RESULT_ERR;
@@ -2266,7 +2402,7 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
                return RTE_VHOST_MSG_RESULT_ERR;
 
        VHOST_LOG_CONFIG(DEBUG,
-               ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+               ":: mac: " RTE_ETHER_ADDR_PRT_FMT "\n",
                mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
        memcpy(dev->mac.addr_bytes, mac, 6);
 
@@ -2735,6 +2871,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
                break;
        case VHOST_USER_SET_VRING_NUM:
        case VHOST_USER_SET_VRING_BASE:
+       case VHOST_USER_GET_VRING_BASE:
        case VHOST_USER_SET_VRING_ENABLE:
                vring_idx = msg->payload.state.index;
                break;
@@ -2950,9 +3087,6 @@ skip_to_post_handle:
                }
        }
 
-       if (unlock_required)
-               vhost_user_unlock_all_queue_pairs(dev);
-
        /* If message was not handled at this stage, treat it as an error */
        if (!handled) {
                VHOST_LOG_CONFIG(ERR,
@@ -2987,6 +3121,8 @@ skip_to_post_handle:
                }
        }
 
+       if (unlock_required)
+               vhost_user_unlock_all_queue_pairs(dev);
 
        if (!virtio_is_ready(dev))
                goto out;