From c3ff0ac70acb1f4a1b22fa24160fdc3be4597724 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 15 Oct 2019 15:59:51 -0300 Subject: [PATCH] vhost: improve performance by supporting large buffer The rte_vhost_dequeue_burst supports two ways of dequeuing data. If the data fits into a buffer, then all data is copied and a single linear buffer is returned. Otherwise it allocates additional mbufs and chains them together to return a multiple segments mbuf. While that covers most use cases, it forces applications that need to work with larger data sizes to support multiple segments mbufs. The non-linear characteristic brings complexity and performance implications to the application. To resolve the issue, add support to attach external buffer to a pktmbuf and let the host provide during registration if attaching an external buffer to pktmbuf is supported and if only linear buffer are supported. Signed-off-by: Flavio Leitner Reviewed-by: Maxime Coquelin --- doc/guides/prog_guide/vhost_lib.rst | 35 +++++++++ lib/librte_vhost/rte_vhost.h | 4 + lib/librte_vhost/socket.c | 22 ++++++ lib/librte_vhost/vhost.c | 22 ++++++ lib/librte_vhost/vhost.h | 4 + lib/librte_vhost/virtio_net.c | 109 ++++++++++++++++++++++++---- 6 files changed, 182 insertions(+), 14 deletions(-) diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst index fc3ee43531..07e40e3c5e 100644 --- a/doc/guides/prog_guide/vhost_lib.rst +++ b/doc/guides/prog_guide/vhost_lib.rst @@ -117,6 +117,41 @@ The following is an overview of some key Vhost API functions: Enabling this flag should only be done when the calling application does not pre-fault the guest shared memory, otherwise migration would fail. + - ``RTE_VHOST_USER_LINEARBUF_SUPPORT`` + + Enabling this flag forces vhost dequeue function to only provide linear + pktmbuf (no multi-segmented pktmbuf). + + The vhost library by default provides a single pktmbuf for given a + packet, but if for some reason the data doesn't fit into a single + pktmbuf (e.g., TSO is enabled), the library will allocate additional + pktmbufs from the same mempool and chain them together to create a + multi-segmented pktmbuf. + + However, the vhost application needs to support multi-segmented format. + If the vhost application does not support that format and requires large + buffers to be dequeue, this flag should be enabled to force only linear + buffers (see RTE_VHOST_USER_EXTBUF_SUPPORT) or drop the packet. + + It is disabled by default. + + - ``RTE_VHOST_USER_EXTBUF_SUPPORT`` + + Enabling this flag allows vhost dequeue function to allocate and attach + an external buffer to a pktmbuf if the pkmbuf doesn't provide enough + space to store all data. + + This is useful when the vhost application wants to support large packets + but doesn't want to increase the default mempool object size nor to + support multi-segmented mbufs (non-linear). In this case, a fresh buffer + is allocated using rte_malloc() which gets attached to a pktmbuf using + rte_pktmbuf_attach_extbuf(). + + See RTE_VHOST_USER_LINEARBUF_SUPPORT as well to disable multi-segmented + mbufs for application that doesn't support chained mbufs. + + It is disabled by default. + * ``rte_vhost_driver_set_features(path, features)`` This function sets the feature bits the vhost-user driver supports. The diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 140d79ce5e..7b5dc87c2e 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -30,6 +30,10 @@ extern "C" { #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3) #define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4) +/* support mbuf with external buffer attached */ +#define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5) +/* support only linear buffers (no chained mbufs) */ +#define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6) /** Protocol features. */ #ifndef VHOST_USER_PROTOCOL_F_MQ diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 810049c158..2d3d208049 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -40,6 +40,8 @@ struct vhost_user_socket { bool dequeue_zero_copy; bool iommu_support; bool use_builtin_virtio_net; + bool extbuf; + bool linearbuf; /* * The "supported_features" indicates the feature bits the @@ -232,6 +234,12 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) if (vsocket->dequeue_zero_copy) vhost_enable_dequeue_zero_copy(vid); + if (vsocket->extbuf) + vhost_enable_extbuf(vid); + + if (vsocket->linearbuf) + vhost_enable_linearbuf(vid); + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); if (vsocket->notify_ops->new_connection) { @@ -870,6 +878,8 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out_free; } vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; + vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; if (vsocket->dequeue_zero_copy && (flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { @@ -902,6 +912,18 @@ rte_vhost_driver_register(const char *path, uint64_t flags) * not compatible with postcopy. */ if (vsocket->dequeue_zero_copy) { + if (vsocket->extbuf) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: zero copy is incompatible with external buffers\n"); + ret = -1; + goto out_mutex; + } + if (vsocket->linearbuf) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: zero copy is incompatible with linear buffers\n"); + ret = -1; + goto out_mutex; + } vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 6c527e2272..1cbe948f74 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -673,6 +673,28 @@ vhost_set_builtin_virtio_net(int vid, bool enable) dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET; } +void +vhost_enable_extbuf(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + dev->extbuf = 1; +} + +void +vhost_enable_linearbuf(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + dev->linearbuf = 1; +} + int rte_vhost_get_mtu(int vid, uint16_t *mtu) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 6250ebd4a9..c76d401155 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -316,6 +316,8 @@ struct virtio_net { rte_atomic16_t broadcast_rarp; uint32_t nr_vring; int dequeue_zero_copy; + int extbuf; + int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; struct inflight_mem_info *inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) @@ -543,6 +545,8 @@ void vhost_attach_vdpa_device(int vid, int did); void vhost_set_ifname(int, const char *if_name, unsigned int if_len); void vhost_enable_dequeue_zero_copy(int vid); void vhost_set_builtin_virtio_net(int vid, bool enable); +void vhost_enable_extbuf(int vid); +void vhost_enable_linearbuf(int vid); struct vhost_device_ops const *vhost_driver_callback_get(const char *path); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index beba9b9f78..66f0c72067 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -1289,6 +1289,93 @@ again: return NULL; } +static void +virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) +{ + rte_free(opaque); +} + +static int +virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) +{ + struct rte_mbuf_ext_shared_info *shinfo = NULL; + uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; + uint16_t buf_len; + rte_iova_t iova; + void *buf; + + /* Try to use pkt buffer to store shinfo to reduce the amount of memory + * required, otherwise store shinfo in the new buffer. + */ + if (rte_pktmbuf_tailroom(pkt) >= sizeof(*shinfo)) + shinfo = rte_pktmbuf_mtod(pkt, + struct rte_mbuf_ext_shared_info *); + else { + total_len += sizeof(*shinfo) + sizeof(uintptr_t); + total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); + } + + if (unlikely(total_len > UINT16_MAX)) + return -ENOSPC; + + buf_len = total_len; + buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); + if (unlikely(buf == NULL)) + return -ENOMEM; + + /* Initialize shinfo */ + if (shinfo) { + shinfo->free_cb = virtio_dev_extbuf_free; + shinfo->fcb_opaque = buf; + rte_mbuf_ext_refcnt_set(shinfo, 1); + } else { + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, + virtio_dev_extbuf_free, buf); + if (unlikely(shinfo == NULL)) { + rte_free(buf); + RTE_LOG(ERR, VHOST_DATA, "Failed to init shinfo\n"); + return -1; + } + } + + iova = rte_malloc_virt2iova(buf); + rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); + rte_pktmbuf_reset_headroom(pkt); + + return 0; +} + +/* + * Allocate a host supported pktmbuf. + */ +static __rte_always_inline struct rte_mbuf * +virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp, + uint32_t data_len) +{ + struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); + + if (unlikely(pkt == NULL)) + return NULL; + + if (rte_pktmbuf_tailroom(pkt) >= data_len) + return pkt; + + /* attach an external buffer if supported */ + if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) + return pkt; + + /* check if chained buffers are allowed */ + if (!dev->linearbuf) + return pkt; + + /* Data doesn't fit into the buffer and the host supports + * only linear buffers + */ + rte_pktmbuf_free(pkt); + + return NULL; +} + static __rte_noinline uint16_t virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1343,26 +1430,23 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, for (i = 0; i < count; i++) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t head_idx; - uint32_t dummy_len; + uint32_t buf_len; uint16_t nr_vec = 0; int err; if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx + i, &nr_vec, buf_vec, - &head_idx, &dummy_len, + &head_idx, &buf_len, VHOST_ACCESS_RO) < 0)) break; if (likely(dev->dequeue_zero_copy == 0)) update_shadow_used_ring_split(vq, head_idx, 0); - pkts[i] = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(pkts[i] == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(pkts[i] == NULL)) break; - } err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], mbuf_pool); @@ -1451,14 +1535,14 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, for (i = 0; i < count; i++) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t buf_id; - uint32_t dummy_len; + uint32_t buf_len; uint16_t desc_count, nr_vec = 0; int err; if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, buf_vec, &nr_vec, - &buf_id, &dummy_len, + &buf_id, &buf_len, VHOST_ACCESS_RO) < 0)) break; @@ -1466,12 +1550,9 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, update_shadow_used_ring_packed(vq, buf_id, 0, desc_count); - pkts[i] = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(pkts[i] == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(pkts[i] == NULL)) break; - } err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], mbuf_pool); -- 2.20.1