vhost: allocate all data on same node as virtqueue

author Maxime Coquelin <maxime.coquelin@redhat.com>

Tue, 29 Jun 2021 16:11:32 +0000 (18:11 +0200)

committer Chenbo Xia <chenbo.xia@intel.com>

Wed, 30 Jun 2021 11:32:13 +0000 (13:32 +0200)
author Maxime Coquelin <maxime.coquelin@redhat.com>
Tue, 29 Jun 2021 16:11:32 +0000 (18:11 +0200)
committer Chenbo Xia <chenbo.xia@intel.com>
Wed, 30 Jun 2021 11:32:13 +0000 (13:32 +0200)
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c

index c96f633..0000cd3 100644 (file)
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -261,7 +261,7 @@ vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
         uint64_t src, dst;
         uint64_t len, remain = desc_len;
  
         uint64_t src, dst;
         uint64_t len, remain = desc_len;
  
-       idesc = rte_malloc(__func__, desc_len, 0);
+       idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
         if (unlikely(!idesc))
                 return NULL;
  
         if (unlikely(!idesc))
                 return NULL;
  
@@ -549,6 +549,7 @@ static void
  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
  {
         struct vhost_virtqueue *vq;
  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
  {
         struct vhost_virtqueue *vq;
+       int numa_node = SOCKET_ID_ANY;
  
         if (vring_idx >= VHOST_MAX_VRING) {
                 VHOST_LOG_CONFIG(ERR,
  
         if (vring_idx >= VHOST_MAX_VRING) {
                 VHOST_LOG_CONFIG(ERR,
@@ -570,6 +571,15 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
  
         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
  
+#ifdef RTE_LIBRTE_VHOST_NUMA
+       if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
+               VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
+                       dev->vid, rte_strerror(errno));
+               numa_node = SOCKET_ID_ANY;
+       }
+#endif
+       vq->numa_node = numa_node;
+
         vhost_user_iotlb_init(dev, vring_idx);
  }
  
         vhost_user_iotlb_init(dev, vring_idx);
  }
  
@@ -1616,7 +1626,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
         struct vhost_virtqueue *vq;
         struct virtio_net *dev = get_device(vid);
         struct rte_vhost_async_features f;
         struct vhost_virtqueue *vq;
         struct virtio_net *dev = get_device(vid);
         struct rte_vhost_async_features f;
-       int node;
  
         if (dev == NULL || ops == NULL)
                 return -1;
  
         if (dev == NULL || ops == NULL)
                 return -1;
@@ -1651,20 +1660,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
                 goto reg_out;
         }
  
                 goto reg_out;
         }
  
-#ifdef RTE_LIBRTE_VHOST_NUMA
-       if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
-               VHOST_LOG_CONFIG(ERR,
-                       "unable to get numa information in async register. "
-                       "allocating async buffer memory on the caller thread node\n");
-               node = SOCKET_ID_ANY;
-       }
-#else
-       node = SOCKET_ID_ANY;
-#endif
-
         vq->async_pkts_info = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct async_inflight_info),
         vq->async_pkts_info = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct async_inflight_info),
-                       RTE_CACHE_LINE_SIZE, node);
+                       RTE_CACHE_LINE_SIZE, vq->numa_node);
         if (!vq->async_pkts_info) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
         if (!vq->async_pkts_info) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
@@ -1675,7 +1673,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
  
         vq->it_pool = rte_malloc_socket(NULL,
                         VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
  
         vq->it_pool = rte_malloc_socket(NULL,
                         VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
-                       RTE_CACHE_LINE_SIZE, node);
+                       RTE_CACHE_LINE_SIZE, vq->numa_node);
         if (!vq->it_pool) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
         if (!vq->it_pool) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
@@ -1686,7 +1684,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
  
         vq->vec_pool = rte_malloc_socket(NULL,
                         VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
  
         vq->vec_pool = rte_malloc_socket(NULL,
                         VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
-                       RTE_CACHE_LINE_SIZE, node);
+                       RTE_CACHE_LINE_SIZE, vq->numa_node);
         if (!vq->vec_pool) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
         if (!vq->vec_pool) {
                 vhost_free_async_mem(vq);
                 VHOST_LOG_CONFIG(ERR,
@@ -1698,7 +1696,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
         if (vq_is_packed(dev)) {
                 vq->async_buffers_packed = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct vring_used_elem_packed),
         if (vq_is_packed(dev)) {
                 vq->async_buffers_packed = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct vring_used_elem_packed),
-                       RTE_CACHE_LINE_SIZE, node);
+                       RTE_CACHE_LINE_SIZE, vq->numa_node);
                 if (!vq->async_buffers_packed) {
                         vhost_free_async_mem(vq);
                         VHOST_LOG_CONFIG(ERR,
                 if (!vq->async_buffers_packed) {
                         vhost_free_async_mem(vq);
                         VHOST_LOG_CONFIG(ERR,
@@ -1709,7 +1707,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
         } else {
                 vq->async_descs_split = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct vring_used_elem),
         } else {
                 vq->async_descs_split = rte_malloc_socket(NULL,
                         vq->size * sizeof(struct vring_used_elem),
-                       RTE_CACHE_LINE_SIZE, node);
+                       RTE_CACHE_LINE_SIZE, vq->numa_node);
                 if (!vq->async_descs_split) {
                         vhost_free_async_mem(vq);
                         VHOST_LOG_CONFIG(ERR,
                 if (!vq->async_descs_split) {
                         vhost_free_async_mem(vq);
                         VHOST_LOG_CONFIG(ERR,
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h

index 8078ddf..8ffe387 100644 (file)
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -164,6 +164,7 @@ struct vhost_virtqueue {
  
         uint16_t                batch_copy_nb_elems;
         struct batch_copy_elem  *batch_copy_elems;
  
         uint16_t                batch_copy_nb_elems;
         struct batch_copy_elem  *batch_copy_elems;
+       int                     numa_node;
         bool                    used_wrap_counter;
         bool                    avail_wrap_counter;
  
         bool                    used_wrap_counter;
         bool                    avail_wrap_counter;
  
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c

index d6ec400..d8ec087 100644 (file)
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -433,10 +433,10 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
         if (vq_is_packed(dev)) {
                 if (vq->shadow_used_packed)
                         rte_free(vq->shadow_used_packed);
         if (vq_is_packed(dev)) {
                 if (vq->shadow_used_packed)
                         rte_free(vq->shadow_used_packed);
-               vq->shadow_used_packed = rte_malloc(NULL,
+               vq->shadow_used_packed = rte_malloc_socket(NULL,
                                 vq->size *
                                 sizeof(struct vring_used_elem_packed),
                                 vq->size *
                                 sizeof(struct vring_used_elem_packed),
-                               RTE_CACHE_LINE_SIZE);
+                               RTE_CACHE_LINE_SIZE, vq->numa_node);
                 if (!vq->shadow_used_packed) {
                         VHOST_LOG_CONFIG(ERR,
                                         "failed to allocate memory for shadow used ring.\n");
                 if (!vq->shadow_used_packed) {
                         VHOST_LOG_CONFIG(ERR,
                                         "failed to allocate memory for shadow used ring.\n");
@@ -447,9 +447,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
                 if (vq->shadow_used_split)
                         rte_free(vq->shadow_used_split);
  
                 if (vq->shadow_used_split)
                         rte_free(vq->shadow_used_split);
  
-               vq->shadow_used_split = rte_malloc(NULL,
+               vq->shadow_used_split = rte_malloc_socket(NULL,
                                 vq->size * sizeof(struct vring_used_elem),
                                 vq->size * sizeof(struct vring_used_elem),
-                               RTE_CACHE_LINE_SIZE);
+                               RTE_CACHE_LINE_SIZE, vq->numa_node);
  
                 if (!vq->shadow_used_split) {
                         VHOST_LOG_CONFIG(ERR,
  
                 if (!vq->shadow_used_split) {
                         VHOST_LOG_CONFIG(ERR,
@@ -460,9 +460,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
  
         if (vq->batch_copy_elems)
                 rte_free(vq->batch_copy_elems);
  
         if (vq->batch_copy_elems)
                 rte_free(vq->batch_copy_elems);
-       vq->batch_copy_elems = rte_malloc(NULL,
+       vq->batch_copy_elems = rte_malloc_socket(NULL,
                                 vq->size * sizeof(struct batch_copy_elem),
                                 vq->size * sizeof(struct batch_copy_elem),
-                               RTE_CACHE_LINE_SIZE);
+                               RTE_CACHE_LINE_SIZE, vq->numa_node);
         if (!vq->batch_copy_elems) {
                 VHOST_LOG_CONFIG(ERR,
                         "failed to allocate memory for batching copy.\n");
         if (!vq->batch_copy_elems) {
                 VHOST_LOG_CONFIG(ERR,
                         "failed to allocate memory for batching copy.\n");
@@ -505,6 +505,9 @@ numa_realloc(struct virtio_net *dev, int index)
                 return dev;
         }
  
                 return dev;
         }
  
+       if (node == vq->numa_node)
+               goto out_dev_realloc;
+
         vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
         if (!vq) {
                 VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on node %d\n",
         vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
         if (!vq) {
                 VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on node %d\n",
@@ -559,6 +562,10 @@ numa_realloc(struct virtio_net *dev, int index)
                 vq->log_cache = lc;
         }
  
                 vq->log_cache = lc;
         }
  
+       vq->numa_node = node;
+
+out_dev_realloc:
+
         if (dev->flags & VIRTIO_DEV_RUNNING)
                 return dev;
  
         if (dev->flags & VIRTIO_DEV_RUNNING)
                 return dev;
  
@@ -1213,7 +1220,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
         struct virtio_net *dev = *pdev;
         struct VhostUserMemory *memory = &msg->payload.memory;
         struct rte_vhost_mem_region *reg;
         struct virtio_net *dev = *pdev;
         struct VhostUserMemory *memory = &msg->payload.memory;
         struct rte_vhost_mem_region *reg;
-
+       int numa_node = SOCKET_ID_ANY;
         uint64_t mmap_offset;
         uint32_t i;
  
         uint64_t mmap_offset;
         uint32_t i;
  
@@ -1253,13 +1260,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
                 for (i = 0; i < dev->nr_vring; i++)
                         vhost_user_iotlb_flush_all(dev->virtqueue[i]);
  
                 for (i = 0; i < dev->nr_vring; i++)
                         vhost_user_iotlb_flush_all(dev->virtqueue[i]);
  
+       /*
+        * If VQ 0 has already been allocated, try to allocate on the same
+        * NUMA node. It can be reallocated later in numa_realloc().
+        */
+       if (dev->nr_vring > 0)
+               numa_node = dev->virtqueue[0]->numa_node;
+
         dev->nr_guest_pages = 0;
         if (dev->guest_pages == NULL) {
                 dev->max_guest_pages = 8;
         dev->nr_guest_pages = 0;
         if (dev->guest_pages == NULL) {
                 dev->max_guest_pages = 8;
-               dev->guest_pages = rte_zmalloc(NULL,
+               dev->guest_pages = rte_zmalloc_socket(NULL,
                                         dev->max_guest_pages *
                                         sizeof(struct guest_page),
                                         dev->max_guest_pages *
                                         sizeof(struct guest_page),
-                                       RTE_CACHE_LINE_SIZE);
+                                       RTE_CACHE_LINE_SIZE,
+                                       numa_node);
                 if (dev->guest_pages == NULL) {
                         VHOST_LOG_CONFIG(ERR,
                                 "(%d) failed to allocate memory "
                 if (dev->guest_pages == NULL) {
                         VHOST_LOG_CONFIG(ERR,
                                 "(%d) failed to allocate memory "
@@ -1269,8 +1284,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
                 }
         }
  
                 }
         }
  
-       dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
-               sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
+       dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct rte_vhost_memory) +
+               sizeof(struct rte_vhost_mem_region) * memory->nregions, 0, numa_node);
         if (dev->mem == NULL) {
                 VHOST_LOG_CONFIG(ERR,
                         "(%d) failed to allocate memory for dev->mem\n",
         if (dev->mem == NULL) {
                 VHOST_LOG_CONFIG(ERR,
                         "(%d) failed to allocate memory for dev->mem\n",
@@ -2193,9 +2208,9 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
                 rte_free(vq->log_cache);
                 vq->log_cache = NULL;
                 vq->log_cache_nb_elem = 0;
                 rte_free(vq->log_cache);
                 vq->log_cache = NULL;
                 vq->log_cache_nb_elem = 0;
-               vq->log_cache = rte_zmalloc("vq log cache",
+               vq->log_cache = rte_malloc_socket("vq log cache",
                                 sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
                                 sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
-                               0);
+                               0, vq->numa_node);
                 /*
                  * If log cache alloc fail, don't fail migration, but no
                  * caching will be done, which will impact performance
                 /*
                  * If log cache alloc fail, don't fail migration, but no
                  * caching will be done, which will impact performance
author	Maxime Coquelin <maxime.coquelin@redhat.com>
	Tue, 29 Jun 2021 16:11:32 +0000 (18:11 +0200)
committer	Chenbo Xia <chenbo.xia@intel.com>
	Wed, 30 Jun 2021 11:32:13 +0000 (13:32 +0200)
lib/vhost/vhost.c		patch \| blob \| history
lib/vhost/vhost.h		patch \| blob \| history
lib/vhost/vhost_user.c		patch \| blob \| history