X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Fvhost%2Fvhost.h;h=a9edc271aa7f6fd4457b5c1f0f1839b00d6d259c;hb=a2dfcd1ff609f5a4fd3b65774618a35c5c9f73c6;hp=f628714c245ec6866262dfbd0f280b4313183b0e;hpb=99a2dd955fba6e4cc23b77d590a033650ced9c45;p=dpdk.git

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index f628714c24..a9edc271aa 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -17,25 +17,26 @@
 
 #include <rte_log.h>
 #include <rte_ether.h>
-#include <rte_rwlock.h>
 #include <rte_malloc.h>
+#include <rte_dmadev.h>
 
 #include "rte_vhost.h"
-#include "rte_vdpa.h"
-#include "rte_vdpa_dev.h"
+#include "vdpa_driver.h"
 
 #include "rte_vhost_async.h"
 
 /* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
+#define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0)
 /* Used to indicate that the device is ready to operate */
-#define VIRTIO_DEV_READY 2
+#define VIRTIO_DEV_READY ((uint32_t)1 << 1)
 /* Used to indicate that the built-in vhost net device backend is enabled */
-#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4
+#define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2)
 /* Used to indicate that the device has its own data path and configured */
-#define VIRTIO_DEV_VDPA_CONFIGURED 8
+#define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3)
 /* Used to indicate that the feature negotiation failed */
-#define VIRTIO_DEV_FEATURES_FAILED 16
+#define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4)
+/* Used to indicate that the virtio_net tx code should fill TX ol_flags */
+#define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5)
 
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
@@ -46,8 +47,11 @@
 
 #define MAX_PKT_BURST 32
 
-#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
-#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 4)
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST)
+#define VHOST_MAX_ASYNC_VEC 2048
+#define VIRTIO_MAX_RX_PKTLEN 9728U
+#define VHOST_DMA_MAX_COPY_COMPLETE ((VIRTIO_MAX_RX_PKTLEN / RTE_MBUF_DEFAULT_DATAROOM) \
+		* MAX_PKT_BURST)
 
 #define PACKED_DESC_ENQUEUE_USED_FLAG(w)	\
 	((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
@@ -117,6 +121,110 @@ struct vring_used_elem_packed {
 	uint32_t count;
 };
 
+/**
+ * iovec
+ */
+struct vhost_iovec {
+	void *src_addr;
+	void *dst_addr;
+	size_t len;
+};
+
+/**
+ * iovec iterator
+ */
+struct vhost_iov_iter {
+	/** pointer to the iovec array */
+	struct vhost_iovec *iov;
+	/** number of iovec in this iterator */
+	unsigned long nr_segs;
+};
+
+struct async_dma_vchan_info {
+	/* circular array to track if packet copy completes */
+	bool **pkts_cmpl_flag_addr;
+
+	/* max elements in 'pkts_cmpl_flag_addr' */
+	uint16_t ring_size;
+	/* ring index mask for 'pkts_cmpl_flag_addr' */
+	uint16_t ring_mask;
+
+	/**
+	 * DMA virtual channel lock. Although it is able to bind DMA
+	 * virtual channels to data plane threads, vhost control plane
+	 * thread could call data plane functions too, thus causing
+	 * DMA device contention.
+	 *
+	 * For example, in VM exit case, vhost control plane thread needs
+	 * to clear in-flight packets before disable vring, but there could
+	 * be anotther data plane thread is enqueuing packets to the same
+	 * vring with the same DMA virtual channel. As dmadev PMD functions
+	 * are lock-free, the control plane and data plane threads could
+	 * operate the same DMA virtual channel at the same time.
+	 */
+	rte_spinlock_t dma_lock;
+};
+
+struct async_dma_info {
+	struct async_dma_vchan_info *vchans;
+	/* number of registered virtual channels */
+	uint16_t nr_vchans;
+};
+
+extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
+
+/**
+ * inflight async packet information
+ */
+struct async_inflight_info {
+	struct rte_mbuf *mbuf;
+	uint16_t descs; /* num of descs inflight */
+	uint16_t nr_buffers; /* num of buffers inflight for packed ring */
+};
+
+struct vhost_async {
+	struct vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
+	struct vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
+	uint16_t iter_idx;
+	uint16_t iovec_idx;
+
+	/* data transfer status */
+	struct async_inflight_info *pkts_info;
+	/**
+	 * Packet reorder array. "true" indicates that DMA device
+	 * completes all copies for the packet.
+	 *
+	 * Note that this array could be written by multiple threads
+	 * simultaneously. For example, in the case of thread0 and
+	 * thread1 RX packets from NIC and then enqueue packets to
+	 * vring0 and vring1 with own DMA device DMA0 and DMA1, it's
+	 * possible for thread0 to get completed copies belonging to
+	 * vring1 from DMA0, while thread0 is calling rte_vhost_poll
+	 * _enqueue_completed() for vring0 and thread1 is calling
+	 * rte_vhost_submit_enqueue_burst() for vring1. In this case,
+	 * vq->access_lock cannot protect pkts_cmpl_flag of vring1.
+	 *
+	 * However, since offloading is per-packet basis, each packet
+	 * flag will only be written by one thread. And single byte
+	 * write is atomic, so no lock for pkts_cmpl_flag is needed.
+	 */
+	bool *pkts_cmpl_flag;
+	uint16_t pkts_idx;
+	uint16_t pkts_inflight_n;
+	union {
+		struct vring_used_elem  *descs_split;
+		struct vring_used_elem_packed *buffers_packed;
+	};
+	union {
+		uint16_t desc_idx_split;
+		uint16_t buffer_idx_packed;
+	};
+	union {
+		uint16_t last_desc_idx_split;
+		uint16_t last_buffer_idx_packed;
+	};
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
@@ -162,6 +270,7 @@ struct vhost_virtqueue {
 
 	uint16_t		batch_copy_nb_elems;
 	struct batch_copy_elem	*batch_copy_elems;
+	int			numa_node;
 	bool			used_wrap_counter;
 	bool			avail_wrap_counter;
 
@@ -190,25 +299,7 @@ struct vhost_virtqueue {
 	struct rte_vhost_resubmit_info *resubmit_inflight;
 	uint64_t		global_counter;
 
-	/* operation callbacks for async dma */
-	struct rte_vhost_async_channel_ops	async_ops;
-
-	struct rte_vhost_iov_iter *it_pool;
-	struct iovec *vec_pool;
-
-	/* async data transfer status */
-	struct async_inflight_info *async_pkts_info;
-	uint16_t	async_pkts_idx;
-	uint16_t	async_pkts_inflight_n;
-	uint16_t	async_last_pkts_n;
-	struct vring_used_elem  *async_descs_split;
-	uint16_t async_desc_idx;
-	uint16_t last_async_desc_idx;
-
-	/* vq async features */
-	bool		async_inorder;
-	bool		async_registered;
-	uint16_t	async_threshold;
+	struct vhost_async	*async;
 
 	int			notif_enable;
 #define VIRTIO_UNINITIALIZED_NOTIF	(-1)
@@ -333,7 +424,8 @@ struct vring_packed_desc_event {
 
 struct guest_page {
 	uint64_t guest_phys_addr;
-	uint64_t host_phys_addr;
+	uint64_t host_iova;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
@@ -359,6 +451,7 @@ struct virtio_net {
 	int16_t			broadcast_rarp;
 	uint32_t		nr_vring;
 	int			async_copy;
+
 	int			extbuf;
 	int			linearbuf;
 	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
@@ -372,7 +465,7 @@ struct virtio_net {
 	uint16_t		mtu;
 	uint8_t			status;
 
-	struct vhost_device_ops const *notify_ops;
+	struct rte_vhost_device_ops const *notify_ops;
 
 	uint32_t		nr_guest_pages;
 	uint32_t		max_guest_pages;
@@ -546,8 +639,7 @@ extern int vhost_data_log_level;
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
-#define MAX_VHOST_DEVICE	1024
-extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+extern struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
 
 #define VHOST_BINARY_SEARCH_THRESH 256
 
@@ -565,6 +657,20 @@ static __rte_always_inline int guest_page_addrcmp(const void *p1,
 	return 0;
 }
 
+static __rte_always_inline int guest_page_rangecmp(const void *p1, const void *p2)
+{
+	const struct guest_page *page1 = (const struct guest_page *)p1;
+	const struct guest_page *page2 = (const struct guest_page *)p2;
+
+	if (page1->guest_phys_addr >= page2->guest_phys_addr) {
+		if (page1->guest_phys_addr < page2->guest_phys_addr + page2->size)
+			return 0;
+		else
+			return 1;
+	} else
+		return -1;
+}
+
 static __rte_always_inline rte_iova_t
 gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
 	uint64_t gpa_size, uint64_t *hpa_size)
@@ -575,20 +681,20 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
 
 	*hpa_size = gpa_size;
 	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
-		key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
+		key.guest_phys_addr = gpa;
 		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
-			       sizeof(struct guest_page), guest_page_addrcmp);
+			       sizeof(struct guest_page), guest_page_rangecmp);
 		if (page) {
 			if (gpa + gpa_size <=
 					page->guest_phys_addr + page->size) {
 				return gpa - page->guest_phys_addr +
-					page->host_phys_addr;
+					page->host_iova;
 			} else if (gpa < page->guest_phys_addr +
 						page->size) {
 				*hpa_size = page->guest_phys_addr +
 					page->size - gpa;
 				return gpa - page->guest_phys_addr +
-					page->host_phys_addr;
+					page->host_iova;
 			}
 		}
 	} else {
@@ -599,13 +705,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
 				if (gpa + gpa_size <=
 					page->guest_phys_addr + page->size) {
 					return gpa - page->guest_phys_addr +
-						page->host_phys_addr;
+						page->host_iova;
 				} else if (gpa < page->guest_phys_addr +
 							page->size) {
 					*hpa_size = page->guest_phys_addr +
 						page->size - gpa;
 					return gpa - page->guest_phys_addr +
-						page->host_phys_addr;
+						page->host_iova;
 				}
 			}
 		}
@@ -674,13 +780,13 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
 
 void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
-void vhost_set_builtin_virtio_net(int vid, bool enable);
+void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags);
 void vhost_enable_extbuf(int vid);
 void vhost_enable_linearbuf(int vid);
 int vhost_enable_guest_notification(struct virtio_net *dev,
 		struct vhost_virtqueue *vq, int enable);
 
-struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
+struct rte_vhost_device_ops const *vhost_driver_callback_get(const char *path);
 
 /*
  * Backend-specific cleanup.