# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h \
- rte_vdpa_dev.h
+ rte_vdpa_dev.h rte_vhost_async.h
# only compile vhost crypto when cryptodev is enabled
ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
'vhost.c', 'vhost_user.c',
'virtio_net.c', 'vhost_crypto.c')
headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vdpa_dev.h',
- 'rte_vhost_crypto.h')
+ 'rte_vhost_crypto.h', 'rte_vhost_async.h')
deps += ['ethdev', 'cryptodev', 'hash', 'pci']
#define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5)
/* support only linear buffers (no chained mbufs) */
#define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6)
+#define RTE_VHOST_USER_ASYNC_COPY (1ULL << 7)
/* Features. */
#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_VHOST_ASYNC_H_
+#define _RTE_VHOST_ASYNC_H_
+
+#include "rte_vhost.h"
+
+/**
+ * iovec iterator
+ */
+struct rte_vhost_iov_iter {
+ /** offset to the first byte of interesting data */
+ size_t offset;
+ /** total bytes of data in this iterator */
+ size_t count;
+ /** pointer to the iovec array */
+ struct iovec *iov;
+ /** number of iovec in this iterator */
+ unsigned long nr_segs;
+};
+
+/**
+ * dma transfer descriptor pair
+ */
+struct rte_vhost_async_desc {
+ /** source memory iov_iter */
+ struct rte_vhost_iov_iter *src;
+ /** destination memory iov_iter */
+ struct rte_vhost_iov_iter *dst;
+};
+
+/**
+ * dma transfer status
+ */
+struct rte_vhost_async_status {
+ /** An array of application specific data for source memory */
+ uintptr_t *src_opaque_data;
+ /** An array of application specific data for destination memory */
+ uintptr_t *dst_opaque_data;
+};
+
+/**
+ * dma operation callbacks to be implemented by applications
+ */
+struct rte_vhost_async_channel_ops {
+ /**
+ * instruct async engines to perform copies for a batch of packets
+ *
+ * @param vid
+ * id of vhost device to perform data copies
+ * @param queue_id
+ * queue id to perform data copies
+ * @param descs
+ * an array of DMA transfer memory descriptors
+ * @param opaque_data
+ * opaque data pair sending to DMA engine
+ * @param count
+ * number of elements in the "descs" array
+ * @return
+ * -1 on failure, number of descs processed on success
+ */
+ int (*transfer_data)(int vid, uint16_t queue_id,
+ struct rte_vhost_async_desc *descs,
+ struct rte_vhost_async_status *opaque_data,
+ uint16_t count);
+ /**
+ * check copy-completed packets from the async engine
+ * @param vid
+ * id of vhost device to check copy completion
+ * @param queue_id
+ * queue id to check copyp completion
+ * @param opaque_data
+ * buffer to receive the opaque data pair from DMA engine
+ * @param max_packets
+ * max number of packets could be completed
+ * @return
+ * -1 on failure, number of iov segments completed on success
+ */
+ int (*check_completed_copies)(int vid, uint16_t queue_id,
+ struct rte_vhost_async_status *opaque_data,
+ uint16_t max_packets);
+};
+
+/**
+ * dma channel feature bit definition
+ */
+struct rte_vhost_async_features {
+ union {
+ uint32_t intval;
+ struct {
+ uint32_t async_inorder:1;
+ uint32_t resvd_0:15;
+ uint32_t async_threshold:12;
+ uint32_t resvd_1:4;
+ };
+ };
+};
+
+/**
+ * register a async channel for vhost
+ *
+ * @param vid
+ * vhost device id async channel to be attached to
+ * @param queue_id
+ * vhost queue id async channel to be attached to
+ * @param features
+ * DMA channel feature bit
+ * b0 : DMA supports inorder data transfer
+ * b1 - b15: reserved
+ * b16 - b27: Packet length threshold for DMA transfer
+ * b28 - b31: reserved
+ * @param ops
+ * DMA operation callbacks
+ * @return
+ * 0 on success, -1 on failures
+ */
+__rte_experimental
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+ uint32_t features, struct rte_vhost_async_channel_ops *ops);
+
+/**
+ * unregister a dma channel for vhost
+ *
+ * @param vid
+ * vhost device id DMA channel to be detached
+ * @param queue_id
+ * vhost queue id DMA channel to be detached
+ * @return
+ * 0 on success, -1 on failures
+ */
+__rte_experimental
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id);
+
+#endif /* _RTE_VHOST_ASYNC_H_ */
rte_vdpa_get_queue_num;
rte_vdpa_get_features;
rte_vdpa_get_protocol_features;
+ rte_vhost_async_channel_register;
+ rte_vhost_async_channel_unregister;
+ rte_vhost_submit_enqueue_burst;
+ rte_vhost_poll_enqueue_completed;
};
bool use_builtin_virtio_net;
bool extbuf;
bool linearbuf;
+ bool async_copy;
/*
* The "supported_features" indicates the feature bits the
size_t size;
struct vhost_user_connection *conn;
int ret;
+ struct virtio_net *dev;
if (vsocket == NULL)
return;
if (vsocket->linearbuf)
vhost_enable_linearbuf(vid);
+ if (vsocket->async_copy) {
+ dev = get_device(vid);
+
+ if (dev)
+ dev->async_copy = 1;
+ }
+
VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
if (vsocket->notify_ops->new_connection) {
goto out_mutex;
}
+ vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
+
+ if (vsocket->async_copy &&
+ (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
+ RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
+ VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
+ "or post-copy feature simultaneously is not "
+ "supported\n");
+ goto out_mutex;
+ }
+
/*
* Set the supported features correctly for the builtin vhost-user
* net driver.
~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
}
+ if (vsocket->async_copy) {
+ vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
+ vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
+ VHOST_LOG_CONFIG(INFO,
+ "Logging feature is disabled in async copy mode\n");
+ }
+
/*
* We'll not be able to receive a buffer from guest in linear mode
* without external buffer if it will not fit in a single mbuf, which is
{
if (vq_is_packed(dev))
rte_free(vq->shadow_used_packed);
- else
+ else {
rte_free(vq->shadow_used_split);
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+ }
rte_free(vq->batch_copy_elems);
rte_mempool_free(vq->iotlb_pool);
rte_free(vq);
return 0;
}
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+ uint32_t features,
+ struct rte_vhost_async_channel_ops *ops)
+{
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = get_device(vid);
+ struct rte_vhost_async_features f;
+
+ if (dev == NULL || ops == NULL)
+ return -1;
+
+ f.intval = features;
+
+ vq = dev->virtqueue[queue_id];
+
+ if (unlikely(vq == NULL || !dev->async_copy))
+ return -1;
+
+ /* packed queue is not supported */
+ if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+ VHOST_LOG_CONFIG(ERR,
+ "async copy is not supported on packed queue or non-inorder mode "
+ "(vid %d, qid: %d)\n", vid, queue_id);
+ return -1;
+ }
+
+ if (unlikely(ops->check_completed_copies == NULL ||
+ ops->transfer_data == NULL))
+ return -1;
+
+ rte_spinlock_lock(&vq->access_lock);
+
+ if (unlikely(vq->async_registered)) {
+ VHOST_LOG_CONFIG(ERR,
+ "async register failed: channel already registered "
+ "(vid %d, qid: %d)\n", vid, queue_id);
+ goto reg_out;
+ }
+
+ vq->async_pkts_pending = rte_malloc(NULL,
+ vq->size * sizeof(uintptr_t),
+ RTE_CACHE_LINE_SIZE);
+ vq->async_pending_info = rte_malloc(NULL,
+ vq->size * sizeof(uint64_t),
+ RTE_CACHE_LINE_SIZE);
+ if (!vq->async_pkts_pending || !vq->async_pending_info) {
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+
+ VHOST_LOG_CONFIG(ERR,
+ "async register failed: cannot allocate memory for vq data "
+ "(vid %d, qid: %d)\n", vid, queue_id);
+ goto reg_out;
+ }
+
+ vq->async_ops.check_completed_copies = ops->check_completed_copies;
+ vq->async_ops.transfer_data = ops->transfer_data;
+
+ vq->async_inorder = f.async_inorder;
+ vq->async_threshold = f.async_threshold;
+
+ vq->async_registered = true;
+
+reg_out:
+ rte_spinlock_unlock(&vq->access_lock);
+
+ return 0;
+}
+
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
+{
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = get_device(vid);
+ int ret = -1;
+
+ if (dev == NULL)
+ return ret;
+
+ vq = dev->virtqueue[queue_id];
+
+ if (vq == NULL)
+ return ret;
+
+ ret = 0;
+ rte_spinlock_lock(&vq->access_lock);
+
+ if (!vq->async_registered)
+ goto out;
+
+ if (vq->async_pkts_inflight_n) {
+ VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
+ "async inflight packets must be completed before unregistration.\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (vq->async_pkts_pending) {
+ rte_free(vq->async_pkts_pending);
+ vq->async_pkts_pending = NULL;
+ }
+
+ if (vq->async_pending_info) {
+ rte_free(vq->async_pending_info);
+ vq->async_pending_info = NULL;
+ }
+
+ vq->async_ops.transfer_data = NULL;
+ vq->async_ops.check_completed_copies = NULL;
+ vq->async_registered = false;
+
+out:
+ rte_spinlock_unlock(&vq->access_lock);
+
+ return ret;
+}
+
RTE_LOG_REGISTER(vhost_config_log_level, lib.vhost.config, INFO);
RTE_LOG_REGISTER(vhost_data_log_level, lib.vhost.data, WARNING);
#include "rte_vdpa.h"
#include "rte_vdpa_dev.h"
+#include "rte_vhost_async.h"
+
/* Used to indicate that the device is running on a data core */
#define VIRTIO_DEV_RUNNING 1
/* Used to indicate that the device is ready to operate */
#define VHOST_LOG_CACHE_NR 32
+#define MAX_PKT_BURST 32
+
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
+#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 2)
+
#define PACKED_DESC_ENQUEUE_USED_FLAG(w) \
((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
VRING_DESC_F_WRITE)
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
int iotlb_cache_nr;
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+
+ /* operation callbacks for async dma */
+ struct rte_vhost_async_channel_ops async_ops;
+
+ struct rte_vhost_iov_iter it_pool[VHOST_MAX_ASYNC_IT];
+ struct iovec vec_pool[VHOST_MAX_ASYNC_VEC];
+
+ /* async data transfer status */
+ uintptr_t **async_pkts_pending;
+ #define ASYNC_PENDING_INFO_N_MSK 0xFFFF
+ #define ASYNC_PENDING_INFO_N_SFT 16
+ uint64_t *async_pending_info;
+ uint16_t async_pkts_idx;
+ uint16_t async_pkts_inflight_n;
+
+ /* vq async features */
+ bool async_inorder;
+ bool async_registered;
+ uint16_t async_threshold;
} __rte_cache_aligned;
#define VHOST_MAX_VRING 0x100
int16_t broadcast_rarp;
uint32_t nr_vring;
int dequeue_zero_copy;
+ int async_copy;
int extbuf;
int linearbuf;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
/* Don't kick guest if we don't reach index specified by guest. */
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
uint16_t old = vq->signalled_used;
- uint16_t new = vq->last_used_idx;
+ uint16_t new = vq->async_pkts_inflight_n ?
+ vq->used->idx:vq->last_used_idx;
bool signalled_used_valid = vq->signalled_used_valid;
vq->signalled_used = new;
} else {
if (vq->shadow_used_split)
rte_free(vq->shadow_used_split);
+
vq->shadow_used_split = rte_malloc(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE);
+
if (!vq->shadow_used_split) {
VHOST_LOG_CONFIG(ERR,
- "failed to allocate memory for shadow used ring.\n");
+ "failed to allocate memory for vq internal data.\n");
return RTE_VHOST_MSG_RESULT_ERR;
}
}
goto err_mmap;
}
- populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
+ populate = (dev->dequeue_zero_copy || dev->async_copy) ?
+ MAP_POPULATE : 0;
mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_SHARED | populate, fd, 0);
reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
mmap_offset;
- if (dev->dequeue_zero_copy)
+ if (dev->dequeue_zero_copy || dev->async_copy)
if (add_guest_pages(dev, reg, alignment) < 0) {
VHOST_LOG_CONFIG(ERR,
"adding guest pages to region %u failed.\n",
} else {
rte_free(vq->shadow_used_split);
vq->shadow_used_split = NULL;
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+ vq->async_pkts_pending = NULL;
+ vq->async_pending_info = NULL;
}
rte_free(vq->batch_copy_elems);
"set queue enable: %d to qp idx: %d\n",
enable, index);
+ if (!enable && dev->virtqueue[index]->async_registered) {
+ if (dev->virtqueue[index]->async_pkts_inflight_n) {
+ VHOST_LOG_CONFIG(ERR, "failed to disable vring. "
+ "async inflight packets must be completed first\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ }
+
/* On disable, rings have to be stopped being processed. */
if (!enable && dev->dequeue_zero_copy)
drain_zmbuf_list(dev->virtqueue[index]);