X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Fvhost%2Fvhost.h;h=a9edc271aa7f6fd4457b5c1f0f1839b00d6d259c;hb=a2dfcd1ff609f5a4fd3b65774618a35c5c9f73c6;hp=f628714c245ec6866262dfbd0f280b4313183b0e;hpb=99a2dd955fba6e4cc23b77d590a033650ced9c45;p=dpdk.git diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index f628714c24..a9edc271aa 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -17,25 +17,26 @@ #include #include -#include #include +#include #include "rte_vhost.h" -#include "rte_vdpa.h" -#include "rte_vdpa_dev.h" +#include "vdpa_driver.h" #include "rte_vhost_async.h" /* Used to indicate that the device is running on a data core */ -#define VIRTIO_DEV_RUNNING 1 +#define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0) /* Used to indicate that the device is ready to operate */ -#define VIRTIO_DEV_READY 2 +#define VIRTIO_DEV_READY ((uint32_t)1 << 1) /* Used to indicate that the built-in vhost net device backend is enabled */ -#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4 +#define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2) /* Used to indicate that the device has its own data path and configured */ -#define VIRTIO_DEV_VDPA_CONFIGURED 8 +#define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3) /* Used to indicate that the feature negotiation failed */ -#define VIRTIO_DEV_FEATURES_FAILED 16 +#define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4) +/* Used to indicate that the virtio_net tx code should fill TX ol_flags */ +#define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5) /* Backend value set by guest. */ #define VIRTIO_DEV_STOPPED -1 @@ -46,8 +47,11 @@ #define MAX_PKT_BURST 32 -#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2) -#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 4) +#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST) +#define VHOST_MAX_ASYNC_VEC 2048 +#define VIRTIO_MAX_RX_PKTLEN 9728U +#define VHOST_DMA_MAX_COPY_COMPLETE ((VIRTIO_MAX_RX_PKTLEN / RTE_MBUF_DEFAULT_DATAROOM) \ + * MAX_PKT_BURST) #define PACKED_DESC_ENQUEUE_USED_FLAG(w) \ ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \ @@ -117,6 +121,110 @@ struct vring_used_elem_packed { uint32_t count; }; +/** + * iovec + */ +struct vhost_iovec { + void *src_addr; + void *dst_addr; + size_t len; +}; + +/** + * iovec iterator + */ +struct vhost_iov_iter { + /** pointer to the iovec array */ + struct vhost_iovec *iov; + /** number of iovec in this iterator */ + unsigned long nr_segs; +}; + +struct async_dma_vchan_info { + /* circular array to track if packet copy completes */ + bool **pkts_cmpl_flag_addr; + + /* max elements in 'pkts_cmpl_flag_addr' */ + uint16_t ring_size; + /* ring index mask for 'pkts_cmpl_flag_addr' */ + uint16_t ring_mask; + + /** + * DMA virtual channel lock. Although it is able to bind DMA + * virtual channels to data plane threads, vhost control plane + * thread could call data plane functions too, thus causing + * DMA device contention. + * + * For example, in VM exit case, vhost control plane thread needs + * to clear in-flight packets before disable vring, but there could + * be anotther data plane thread is enqueuing packets to the same + * vring with the same DMA virtual channel. As dmadev PMD functions + * are lock-free, the control plane and data plane threads could + * operate the same DMA virtual channel at the same time. + */ + rte_spinlock_t dma_lock; +}; + +struct async_dma_info { + struct async_dma_vchan_info *vchans; + /* number of registered virtual channels */ + uint16_t nr_vchans; +}; + +extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; + +/** + * inflight async packet information + */ +struct async_inflight_info { + struct rte_mbuf *mbuf; + uint16_t descs; /* num of descs inflight */ + uint16_t nr_buffers; /* num of buffers inflight for packed ring */ +}; + +struct vhost_async { + struct vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT]; + struct vhost_iovec iovec[VHOST_MAX_ASYNC_VEC]; + uint16_t iter_idx; + uint16_t iovec_idx; + + /* data transfer status */ + struct async_inflight_info *pkts_info; + /** + * Packet reorder array. "true" indicates that DMA device + * completes all copies for the packet. + * + * Note that this array could be written by multiple threads + * simultaneously. For example, in the case of thread0 and + * thread1 RX packets from NIC and then enqueue packets to + * vring0 and vring1 with own DMA device DMA0 and DMA1, it's + * possible for thread0 to get completed copies belonging to + * vring1 from DMA0, while thread0 is calling rte_vhost_poll + * _enqueue_completed() for vring0 and thread1 is calling + * rte_vhost_submit_enqueue_burst() for vring1. In this case, + * vq->access_lock cannot protect pkts_cmpl_flag of vring1. + * + * However, since offloading is per-packet basis, each packet + * flag will only be written by one thread. And single byte + * write is atomic, so no lock for pkts_cmpl_flag is needed. + */ + bool *pkts_cmpl_flag; + uint16_t pkts_idx; + uint16_t pkts_inflight_n; + union { + struct vring_used_elem *descs_split; + struct vring_used_elem_packed *buffers_packed; + }; + union { + uint16_t desc_idx_split; + uint16_t buffer_idx_packed; + }; + union { + uint16_t last_desc_idx_split; + uint16_t last_buffer_idx_packed; + }; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ @@ -162,6 +270,7 @@ struct vhost_virtqueue { uint16_t batch_copy_nb_elems; struct batch_copy_elem *batch_copy_elems; + int numa_node; bool used_wrap_counter; bool avail_wrap_counter; @@ -190,25 +299,7 @@ struct vhost_virtqueue { struct rte_vhost_resubmit_info *resubmit_inflight; uint64_t global_counter; - /* operation callbacks for async dma */ - struct rte_vhost_async_channel_ops async_ops; - - struct rte_vhost_iov_iter *it_pool; - struct iovec *vec_pool; - - /* async data transfer status */ - struct async_inflight_info *async_pkts_info; - uint16_t async_pkts_idx; - uint16_t async_pkts_inflight_n; - uint16_t async_last_pkts_n; - struct vring_used_elem *async_descs_split; - uint16_t async_desc_idx; - uint16_t last_async_desc_idx; - - /* vq async features */ - bool async_inorder; - bool async_registered; - uint16_t async_threshold; + struct vhost_async *async; int notif_enable; #define VIRTIO_UNINITIALIZED_NOTIF (-1) @@ -333,7 +424,8 @@ struct vring_packed_desc_event { struct guest_page { uint64_t guest_phys_addr; - uint64_t host_phys_addr; + uint64_t host_iova; + uint64_t host_user_addr; uint64_t size; }; @@ -359,6 +451,7 @@ struct virtio_net { int16_t broadcast_rarp; uint32_t nr_vring; int async_copy; + int extbuf; int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; @@ -372,7 +465,7 @@ struct virtio_net { uint16_t mtu; uint8_t status; - struct vhost_device_ops const *notify_ops; + struct rte_vhost_device_ops const *notify_ops; uint32_t nr_guest_pages; uint32_t max_guest_pages; @@ -546,8 +639,7 @@ extern int vhost_data_log_level; #define PRINT_PACKET(device, addr, size, header) do {} while (0) #endif -#define MAX_VHOST_DEVICE 1024 -extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; +extern struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE]; #define VHOST_BINARY_SEARCH_THRESH 256 @@ -565,6 +657,20 @@ static __rte_always_inline int guest_page_addrcmp(const void *p1, return 0; } +static __rte_always_inline int guest_page_rangecmp(const void *p1, const void *p2) +{ + const struct guest_page *page1 = (const struct guest_page *)p1; + const struct guest_page *page2 = (const struct guest_page *)p2; + + if (page1->guest_phys_addr >= page2->guest_phys_addr) { + if (page1->guest_phys_addr < page2->guest_phys_addr + page2->size) + return 0; + else + return 1; + } else + return -1; +} + static __rte_always_inline rte_iova_t gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t gpa_size, uint64_t *hpa_size) @@ -575,20 +681,20 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, *hpa_size = gpa_size; if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) { - key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1); + key.guest_phys_addr = gpa; page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages, - sizeof(struct guest_page), guest_page_addrcmp); + sizeof(struct guest_page), guest_page_rangecmp); if (page) { if (gpa + gpa_size <= page->guest_phys_addr + page->size) { return gpa - page->guest_phys_addr + - page->host_phys_addr; + page->host_iova; } else if (gpa < page->guest_phys_addr + page->size) { *hpa_size = page->guest_phys_addr + page->size - gpa; return gpa - page->guest_phys_addr + - page->host_phys_addr; + page->host_iova; } } } else { @@ -599,13 +705,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, if (gpa + gpa_size <= page->guest_phys_addr + page->size) { return gpa - page->guest_phys_addr + - page->host_phys_addr; + page->host_iova; } else if (gpa < page->guest_phys_addr + page->size) { *hpa_size = page->guest_phys_addr + page->size - gpa; return gpa - page->guest_phys_addr + - page->host_phys_addr; + page->host_iova; } } } @@ -674,13 +780,13 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev); void vhost_set_ifname(int, const char *if_name, unsigned int if_len); -void vhost_set_builtin_virtio_net(int vid, bool enable); +void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags); void vhost_enable_extbuf(int vid); void vhost_enable_linearbuf(int vid); int vhost_enable_guest_notification(struct virtio_net *dev, struct vhost_virtqueue *vq, int enable); -struct vhost_device_ops const *vhost_driver_callback_get(const char *path); +struct rte_vhost_device_ops const *vhost_driver_callback_get(const char *path); /* * Backend-specific cleanup.