vhost: add log when setting vring base

[dpdk.git] / lib / vhost / vhost_user.c
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c

index d8ec087dfceb8358325861295ba894721ee6323b..f99692bdc1d3d8f89cdfda0090069daa5897fa90 100644 (file)
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@
  #include <rte_common.h>
  #include <rte_malloc.h>
  #include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
  
  #include "iotlb.h"
  #include "vhost.h"
@@ -141,6 +143,59 @@ get_blk_size(int fd)
         return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
  }
  
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+       uint64_t host_iova;
+       int ret = 0;
+
+       host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+       if (do_map) {
+               /* Add mapped region into the default container of DPDK. */
+               ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+                                                region->host_user_addr,
+                                                host_iova,
+                                                region->size);
+               if (ret) {
+                       /*
+                        * DMA device may bind with kernel driver, in this case,
+                        * we don't need to program IOMMU manually. However, if no
+                        * device is bound with vfio/uio in DPDK, and vfio kernel
+                        * module is loaded, the API will still be called and return
+                        * with ENODEV/ENOSUP.
+                        *
+                        * DPDK vfio only returns ENODEV/ENOSUP in very similar
+                        * situations(vfio either unsupported, or supported
+                        * but no devices found). Either way, no mappings could be
+                        * performed. We treat it as normal case in async path.
+                        */
+                       if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+                               return 0;
+
+                       VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+                       /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
+                       return 0;
+               }
+
+       } else {
+               /* Remove mapped region from the default container of DPDK. */
+               ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+                                                  region->host_user_addr,
+                                                  host_iova,
+                                                  region->size);
+               if (ret) {
+                       /* like DMA map, ignore the kernel driver case when unmap. */
+                       if (rte_errno == EINVAL)
+                               return 0;
+
+                       VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
  static void
  free_mem_region(struct virtio_net *dev)
  {
@@ -153,6 +208,9 @@ free_mem_region(struct virtio_net *dev)
         for (i = 0; i < dev->mem->nregions; i++) {
                 reg = &dev->mem->regions[i];
                 if (reg->host_user_addr) {
+                       if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+                               async_dma_map(reg, false);
+
                         munmap(reg->mmap_addr, reg->mmap_size);
                         close(reg->fd);
                 }
@@ -188,7 +246,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
                         dev->inflight_info->fd = -1;
                 }
  
-               free(dev->inflight_info);
+               rte_free(dev->inflight_info);
                 dev->inflight_info = NULL;
         }
  
@@ -562,6 +620,31 @@ numa_realloc(struct virtio_net *dev, int index)
                 vq->log_cache = lc;
         }
  
+       if (vq->resubmit_inflight) {
+               struct rte_vhost_resubmit_info *ri;
+
+               ri = rte_realloc_socket(vq->resubmit_inflight, sizeof(*ri), 0, node);
+               if (!ri) {
+                       VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit inflight on node %d\n",
+                                       node);
+                       return dev;
+               }
+               vq->resubmit_inflight = ri;
+
+               if (ri->resubmit_list) {
+                       struct rte_vhost_resubmit_desc *rd;
+
+                       rd = rte_realloc_socket(ri->resubmit_list, sizeof(*rd) * ri->resubmit_num,
+                                       0, node);
+                       if (!rd) {
+                               VHOST_LOG_CONFIG(ERR, "Failed to realloc resubmit list on node %d\n",
+                                               node);
+                               return dev;
+                       }
+                       ri->resubmit_list = rd;
+               }
+       }
+
         vq->numa_node = node;
  
  out_dev_realloc:
@@ -890,6 +973,11 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
                 vq->last_avail_idx = msg->payload.state.num;
         }
  
+       VHOST_LOG_CONFIG(INFO,
+               "(%s) vring base idx:%u last_used_idx:%u last_avail_idx:%u.\n",
+               dev->ifname, msg->payload.state.index, vq->last_used_idx,
+               vq->last_avail_idx);
+
         return RTE_VHOST_MSG_RESULT_OK;
  }
  
@@ -1032,7 +1120,7 @@ vhost_user_postcopy_region_register(struct virtio_net *dev,
         struct uffdio_register reg_struct;
  
         /*
-        * Let's register all the mmap'ed area to ensure
+        * Let's register all the mmapped area to ensure
          * alignment on page boundary.
          */
         reg_struct.range.start = (uint64_t)(uintptr_t)reg->mmap_addr;
@@ -1094,7 +1182,7 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,
         msg->fd_num = 0;
         send_vhost_reply(main_fd, msg);
  
-       /* Wait for qemu to acknolwedge it's got the addresses
+       /* Wait for qemu to acknowledge it got the addresses
          * we've got to wait before we're allowed to generate faults.
          */
         if (read_vhost_message(main_fd, &ack_msg) <= 0) {
@@ -1132,6 +1220,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
         uint64_t mmap_size;
         uint64_t alignment;
         int populate;
+       int ret;
  
         /* Check for memory_size + mmap_offset overflow */
         if (mmap_offset >= -region->size) {
@@ -1185,13 +1274,21 @@ vhost_user_mmap_region(struct virtio_net *dev,
         region->mmap_size = mmap_size;
         region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
  
-       if (dev->async_copy)
+       if (dev->async_copy) {
                 if (add_guest_pages(dev, region, alignment) < 0) {
-                       VHOST_LOG_CONFIG(ERR,
-                                       "adding guest pages to region failed.\n");
+                       VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
                         return -1;
                 }
  
+               if (rte_vfio_is_enabled("vfio")) {
+                       ret = async_dma_map(region, true);
+                       if (ret) {
+                               VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+                               return -1;
+                       }
+               }
+       }
+
         VHOST_LOG_CONFIG(INFO,
                         "guest memory region size: 0x%" PRIx64 "\n"
                         "\t guest physical addr: 0x%" PRIx64 "\n"
@@ -1223,6 +1320,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
         int numa_node = SOCKET_ID_ANY;
         uint64_t mmap_offset;
         uint32_t i;
+       bool async_notify = false;
  
         if (validate_msg_fds(msg, memory->nregions) != 0)
                 return RTE_VHOST_MSG_RESULT_ERR;
@@ -1250,6 +1348,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
                                 vdpa_dev->ops->dev_close(dev->vid);
                         dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
                 }
+
+               /* notify the vhost application to stop DMA transfers */
+               if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+                       for (i = 0; i < dev->nr_vring; i++) {
+                               dev->notify_ops->vring_state_changed(dev->vid,
+                                               i, 0);
+                       }
+                       async_notify = true;
+               }
+
                 free_mem_region(dev);
                 rte_free(dev->mem);
                 dev->mem = NULL;
@@ -1346,12 +1454,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
  
         dump_guest_pages(dev);
  
+       if (async_notify) {
+               for (i = 0; i < dev->nr_vring; i++)
+                       dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+       }
+
         return RTE_VHOST_MSG_RESULT_OK;
  
  free_mem_table:
         free_mem_region(dev);
         rte_free(dev->mem);
         dev->mem = NULL;
+
  free_guest_pages:
         rte_free(dev->guest_pages);
         dev->guest_pages = NULL;
@@ -1491,6 +1605,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
         uint16_t num_queues, queue_size;
         struct virtio_net *dev = *pdev;
         int fd, i, j;
+       int numa_node = SOCKET_ID_ANY;
         void *addr;
  
         if (msg->size != sizeof(msg->payload.inflight)) {
@@ -1500,9 +1615,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
                 return RTE_VHOST_MSG_RESULT_ERR;
         }
  
+       /*
+        * If VQ 0 has already been allocated, try to allocate on the same
+        * NUMA node. It can be reallocated later in numa_realloc().
+        */
+       if (dev->nr_vring > 0)
+               numa_node = dev->virtqueue[0]->numa_node;
+
         if (dev->inflight_info == NULL) {
-               dev->inflight_info = calloc(1,
-                                           sizeof(struct inflight_mem_info));
+               dev->inflight_info = rte_zmalloc_socket("inflight_info",
+                               sizeof(struct inflight_mem_info), 0, numa_node);
                 if (!dev->inflight_info) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to alloc dev inflight area\n");
@@ -1585,6 +1707,7 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
         struct vhost_virtqueue *vq;
         void *addr;
         int fd, i;
+       int numa_node = SOCKET_ID_ANY;
  
         fd = msg->fds[0];
         if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
@@ -1618,9 +1741,16 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
                 "set_inflight_fd pervq_inflight_size: %d\n",
                 pervq_inflight_size);
  
+       /*
+        * If VQ 0 has already been allocated, try to allocate on the same
+        * NUMA node. It can be reallocated later in numa_realloc().
+        */
+       if (dev->nr_vring > 0)
+               numa_node = dev->virtqueue[0]->numa_node;
+
         if (!dev->inflight_info) {
-               dev->inflight_info = calloc(1,
-                                           sizeof(struct inflight_mem_info));
+               dev->inflight_info = rte_zmalloc_socket("inflight_info",
+                               sizeof(struct inflight_mem_info), 0, numa_node);
                 if (dev->inflight_info == NULL) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to alloc dev inflight area\n");
@@ -1779,19 +1909,21 @@ vhost_check_queue_inflights_split(struct virtio_net *dev,
         vq->last_avail_idx += resubmit_num;
  
         if (resubmit_num) {
-               resubmit  = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+               resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+                               0, vq->numa_node);
                 if (!resubmit) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to allocate memory for resubmit info.\n");
                         return RTE_VHOST_MSG_RESULT_ERR;
                 }
  
-               resubmit->resubmit_list = calloc(resubmit_num,
-                       sizeof(struct rte_vhost_resubmit_desc));
+               resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+                               resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+                               0, vq->numa_node);
                 if (!resubmit->resubmit_list) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to allocate memory for inflight desc.\n");
-                       free(resubmit);
+                       rte_free(resubmit);
                         return RTE_VHOST_MSG_RESULT_ERR;
                 }
  
@@ -1873,19 +2005,21 @@ vhost_check_queue_inflights_packed(struct virtio_net *dev,
         }
  
         if (resubmit_num) {
-               resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+               resubmit = rte_zmalloc_socket("resubmit", sizeof(struct rte_vhost_resubmit_info),
+                               0, vq->numa_node);
                 if (resubmit == NULL) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to allocate memory for resubmit info.\n");
                         return RTE_VHOST_MSG_RESULT_ERR;
                 }
  
-               resubmit->resubmit_list = calloc(resubmit_num,
-                       sizeof(struct rte_vhost_resubmit_desc));
+               resubmit->resubmit_list = rte_zmalloc_socket("resubmit_list",
+                               resubmit_num * sizeof(struct rte_vhost_resubmit_desc),
+                               0, vq->numa_node);
                 if (resubmit->resubmit_list == NULL) {
                         VHOST_LOG_CONFIG(ERR,
                                 "failed to allocate memory for resubmit desc.\n");
-                       free(resubmit);
+                       rte_free(resubmit);
                         return RTE_VHOST_MSG_RESULT_ERR;
                 }
  
@@ -2052,6 +2186,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
         msg->size = sizeof(msg->payload.state);
         msg->fd_num = 0;
  
+       vhost_user_iotlb_flush_all(vq);
+
         vring_invalidate(dev, vq);
  
         return RTE_VHOST_MSG_RESULT_REPLY;
@@ -2077,8 +2213,8 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
                 "set queue enable: %d to qp idx: %d\n",
                 enable, index);
  
-       if (enable && dev->virtqueue[index]->async_registered) {
-               if (dev->virtqueue[index]->async_pkts_inflight_n) {
+       if (enable && dev->virtqueue[index]->async) {
+               if (dev->virtqueue[index]->async->pkts_inflight_n) {
                         VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
                         "async inflight packets must be completed first\n");
                         return RTE_VHOST_MSG_RESULT_ERR;
@@ -2266,7 +2402,7 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
                 return RTE_VHOST_MSG_RESULT_ERR;
  
         VHOST_LOG_CONFIG(DEBUG,
-               ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+               ":: mac: " RTE_ETHER_ADDR_PRT_FMT "\n",
                 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
         memcpy(dev->mac.addr_bytes, mac, 6);
  
@@ -2735,6 +2871,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
                 break;
         case VHOST_USER_SET_VRING_NUM:
         case VHOST_USER_SET_VRING_BASE:
+       case VHOST_USER_GET_VRING_BASE:
         case VHOST_USER_SET_VRING_ENABLE:
                 vring_idx = msg->payload.state.index;
                 break;
@@ -2950,9 +3087,6 @@ skip_to_post_handle:
                 }
         }
  
-       if (unlock_required)
-               vhost_user_unlock_all_queue_pairs(dev);
-
         /* If message was not handled at this stage, treat it as an error */
         if (!handled) {
                 VHOST_LOG_CONFIG(ERR,
@@ -2987,6 +3121,8 @@ skip_to_post_handle:
                 }
         }
  
+       if (unlock_required)
+               vhost_user_unlock_all_queue_pairs(dev);
  
         if (!virtio_is_ready(dev))
                 goto out;