replace zero-length arrays with flexible ones
[dpdk.git] / lib / vhost / vhost.h
index b3f0c1d..14235aa 100644 (file)
 
 #include <rte_log.h>
 #include <rte_ether.h>
-#include <rte_rwlock.h>
 #include <rte_malloc.h>
+#include <rte_dmadev.h>
 
 #include "rte_vhost.h"
-#include "rte_vdpa.h"
 #include "vdpa_driver.h"
 
 #include "rte_vhost_async.h"
@@ -38,6 +37,8 @@
 #define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4)
 /* Used to indicate that the virtio_net tx code should fill TX ol_flags */
 #define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5)
+/*  Used to indicate the application has requested statistics collection */
+#define VIRTIO_DEV_STATS_ENABLED ((uint32_t)1 << 6)
 
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
@@ -50,6 +51,9 @@
 
 #define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST)
 #define VHOST_MAX_ASYNC_VEC 2048
+#define VIRTIO_MAX_RX_PKTLEN 9728U
+#define VHOST_DMA_MAX_COPY_COMPLETE ((VIRTIO_MAX_RX_PKTLEN / RTE_MBUF_DEFAULT_DATAROOM) \
+               * MAX_PKT_BURST)
 
 #define PACKED_DESC_ENQUEUE_USED_FLAG(w)       \
        ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
@@ -119,6 +123,75 @@ struct vring_used_elem_packed {
        uint32_t count;
 };
 
+/**
+ * Virtqueue statistics
+ */
+struct virtqueue_stats {
+       uint64_t packets;
+       uint64_t bytes;
+       uint64_t multicast;
+       uint64_t broadcast;
+       /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+       uint64_t size_bins[8];
+       uint64_t guest_notifications;
+       uint64_t iotlb_hits;
+       uint64_t iotlb_misses;
+       uint64_t inflight_submitted;
+       uint64_t inflight_completed;
+};
+
+/**
+ * iovec
+ */
+struct vhost_iovec {
+       void *src_addr;
+       void *dst_addr;
+       size_t len;
+};
+
+/**
+ * iovec iterator
+ */
+struct vhost_iov_iter {
+       /** pointer to the iovec array */
+       struct vhost_iovec *iov;
+       /** number of iovec in this iterator */
+       unsigned long nr_segs;
+};
+
+struct async_dma_vchan_info {
+       /* circular array to track if packet copy completes */
+       bool **pkts_cmpl_flag_addr;
+
+       /* max elements in 'pkts_cmpl_flag_addr' */
+       uint16_t ring_size;
+       /* ring index mask for 'pkts_cmpl_flag_addr' */
+       uint16_t ring_mask;
+
+       /**
+        * DMA virtual channel lock. Although it is able to bind DMA
+        * virtual channels to data plane threads, vhost control plane
+        * thread could call data plane functions too, thus causing
+        * DMA device contention.
+        *
+        * For example, in VM exit case, vhost control plane thread needs
+        * to clear in-flight packets before disable vring, but there could
+        * be anotther data plane thread is enqueuing packets to the same
+        * vring with the same DMA virtual channel. As dmadev PMD functions
+        * are lock-free, the control plane and data plane threads could
+        * operate the same DMA virtual channel at the same time.
+        */
+       rte_spinlock_t dma_lock;
+};
+
+struct async_dma_info {
+       struct async_dma_vchan_info *vchans;
+       /* number of registered virtual channels */
+       uint16_t nr_vchans;
+};
+
+extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
+
 /**
  * inflight async packet information
  */
@@ -126,19 +199,36 @@ struct async_inflight_info {
        struct rte_mbuf *mbuf;
        uint16_t descs; /* num of descs inflight */
        uint16_t nr_buffers; /* num of buffers inflight for packed ring */
+       struct virtio_net_hdr nethdr;
 };
 
 struct vhost_async {
-       /* operation callbacks for DMA */
-       struct rte_vhost_async_channel_ops ops;
-
-       struct rte_vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
-       struct rte_vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
+       struct vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
+       struct vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
        uint16_t iter_idx;
        uint16_t iovec_idx;
 
        /* data transfer status */
        struct async_inflight_info *pkts_info;
+       /**
+        * Packet reorder array. "true" indicates that DMA device
+        * completes all copies for the packet.
+        *
+        * Note that this array could be written by multiple threads
+        * simultaneously. For example, in the case of thread0 and
+        * thread1 RX packets from NIC and then enqueue packets to
+        * vring0 and vring1 with own DMA device DMA0 and DMA1, it's
+        * possible for thread0 to get completed copies belonging to
+        * vring1 from DMA0, while thread0 is calling rte_vhost_poll
+        * _enqueue_completed() for vring0 and thread1 is calling
+        * rte_vhost_submit_enqueue_burst() for vring1. In this case,
+        * vq->access_lock cannot protect pkts_cmpl_flag of vring1.
+        *
+        * However, since offloading is per-packet basis, each packet
+        * flag will only be written by one thread. And single byte
+        * write is atomic, so no lock for pkts_cmpl_flag is needed.
+        */
+       bool *pkts_cmpl_flag;
        uint16_t pkts_idx;
        uint16_t pkts_inflight_n;
        union {
@@ -235,6 +325,7 @@ struct vhost_virtqueue {
 #define VIRTIO_UNINITIALIZED_NOTIF     (-1)
 
        struct vhost_vring_addr ring_addrs;
+       struct virtqueue_stats  stats;
 } __rte_cache_aligned;
 
 /* Virtio device status as per Virtio specification */
@@ -354,7 +445,8 @@ struct vring_packed_desc_event {
 
 struct guest_page {
        uint64_t guest_phys_addr;
-       uint64_t host_phys_addr;
+       uint64_t host_iova;
+       uint64_t host_user_addr;
        uint64_t size;
 };
 
@@ -568,8 +660,7 @@ extern int vhost_data_log_level;
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
-#define MAX_VHOST_DEVICE       1024
-extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+extern struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
 
 #define VHOST_BINARY_SEARCH_THRESH 256
 
@@ -618,13 +709,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
                        if (gpa + gpa_size <=
                                        page->guest_phys_addr + page->size) {
                                return gpa - page->guest_phys_addr +
-                                       page->host_phys_addr;
+                                       page->host_iova;
                        } else if (gpa < page->guest_phys_addr +
                                                page->size) {
                                *hpa_size = page->guest_phys_addr +
                                        page->size - gpa;
                                return gpa - page->guest_phys_addr +
-                                       page->host_phys_addr;
+                                       page->host_iova;
                        }
                }
        } else {
@@ -635,13 +726,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
                                if (gpa + gpa_size <=
                                        page->guest_phys_addr + page->size) {
                                        return gpa - page->guest_phys_addr +
-                                               page->host_phys_addr;
+                                               page->host_iova;
                                } else if (gpa < page->guest_phys_addr +
                                                        page->size) {
                                        *hpa_size = page->guest_phys_addr +
                                                page->size - gpa;
                                        return gpa - page->guest_phys_addr +
-                                               page->host_phys_addr;
+                                               page->host_iova;
                                }
                        }
                }
@@ -710,7 +801,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
 
 void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
-void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags);
+void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags, bool stats_enabled);
 void vhost_enable_extbuf(int vid);
 void vhost_enable_linearbuf(int vid);
 int vhost_enable_guest_notification(struct virtio_net *dev,
@@ -786,6 +877,8 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
                                        (vq->callfd >= 0)) ||
                                unlikely(!signalled_used_valid)) {
                        eventfd_write(vq->callfd, (eventfd_t) 1);
+                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                               vq->stats.guest_notifications++;
                        if (dev->notify_ops->guest_notified)
                                dev->notify_ops->guest_notified(dev->vid);
                }
@@ -794,6 +887,8 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
                if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
                                && (vq->callfd >= 0)) {
                        eventfd_write(vq->callfd, (eventfd_t)1);
+                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                               vq->stats.guest_notifications++;
                        if (dev->notify_ops->guest_notified)
                                dev->notify_ops->guest_notified(dev->vid);
                }
@@ -887,5 +982,4 @@ mbuf_is_consumed(struct rte_mbuf *m)
 
        return true;
 }
-
 #endif /* _VHOST_NET_CDEV_H_ */