examples/l3fwd: share queue size variables

[dpdk.git] / lib / vhost / vhost.h
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h

index f628714..a9edc27 100644 (file)
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -17,25 +17,26 @@
  
  #include <rte_log.h>
  #include <rte_ether.h>
  
  #include <rte_log.h>
  #include <rte_ether.h>
-#include <rte_rwlock.h>
  #include <rte_malloc.h>
  #include <rte_malloc.h>
+#include <rte_dmadev.h>
  
  #include "rte_vhost.h"
  
  #include "rte_vhost.h"
-#include "rte_vdpa.h"
-#include "rte_vdpa_dev.h"
+#include "vdpa_driver.h"
  
  #include "rte_vhost_async.h"
  
  /* Used to indicate that the device is running on a data core */
  
  #include "rte_vhost_async.h"
  
  /* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
+#define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0)
  /* Used to indicate that the device is ready to operate */
  /* Used to indicate that the device is ready to operate */
-#define VIRTIO_DEV_READY 2
+#define VIRTIO_DEV_READY ((uint32_t)1 << 1)
  /* Used to indicate that the built-in vhost net device backend is enabled */
  /* Used to indicate that the built-in vhost net device backend is enabled */
-#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4
+#define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2)
  /* Used to indicate that the device has its own data path and configured */
  /* Used to indicate that the device has its own data path and configured */
-#define VIRTIO_DEV_VDPA_CONFIGURED 8
+#define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3)
  /* Used to indicate that the feature negotiation failed */
  /* Used to indicate that the feature negotiation failed */
-#define VIRTIO_DEV_FEATURES_FAILED 16
+#define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4)
+/* Used to indicate that the virtio_net tx code should fill TX ol_flags */
+#define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5)
  
  /* Backend value set by guest. */
  #define VIRTIO_DEV_STOPPED -1
  
  /* Backend value set by guest. */
  #define VIRTIO_DEV_STOPPED -1
@@ -46,8 +47,11 @@
  
  #define MAX_PKT_BURST 32
  
  
  #define MAX_PKT_BURST 32
  
-#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
-#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 4)
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST)
+#define VHOST_MAX_ASYNC_VEC 2048
+#define VIRTIO_MAX_RX_PKTLEN 9728U
+#define VHOST_DMA_MAX_COPY_COMPLETE ((VIRTIO_MAX_RX_PKTLEN / RTE_MBUF_DEFAULT_DATAROOM) \
+               * MAX_PKT_BURST)
  
  #define PACKED_DESC_ENQUEUE_USED_FLAG(w)       \
         ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
  
  #define PACKED_DESC_ENQUEUE_USED_FLAG(w)       \
         ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
@@ -117,6 +121,110 @@ struct vring_used_elem_packed {
         uint32_t count;
  };
  
         uint32_t count;
  };
  
+/**
+ * iovec
+ */
+struct vhost_iovec {
+       void *src_addr;
+       void *dst_addr;
+       size_t len;
+};
+
+/**
+ * iovec iterator
+ */
+struct vhost_iov_iter {
+       /** pointer to the iovec array */
+       struct vhost_iovec *iov;
+       /** number of iovec in this iterator */
+       unsigned long nr_segs;
+};
+
+struct async_dma_vchan_info {
+       /* circular array to track if packet copy completes */
+       bool **pkts_cmpl_flag_addr;
+
+       /* max elements in 'pkts_cmpl_flag_addr' */
+       uint16_t ring_size;
+       /* ring index mask for 'pkts_cmpl_flag_addr' */
+       uint16_t ring_mask;
+
+       /**
+        * DMA virtual channel lock. Although it is able to bind DMA
+        * virtual channels to data plane threads, vhost control plane
+        * thread could call data plane functions too, thus causing
+        * DMA device contention.
+        *
+        * For example, in VM exit case, vhost control plane thread needs
+        * to clear in-flight packets before disable vring, but there could
+        * be anotther data plane thread is enqueuing packets to the same
+        * vring with the same DMA virtual channel. As dmadev PMD functions
+        * are lock-free, the control plane and data plane threads could
+        * operate the same DMA virtual channel at the same time.
+        */
+       rte_spinlock_t dma_lock;
+};
+
+struct async_dma_info {
+       struct async_dma_vchan_info *vchans;
+       /* number of registered virtual channels */
+       uint16_t nr_vchans;
+};
+
+extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
+
+/**
+ * inflight async packet information
+ */
+struct async_inflight_info {
+       struct rte_mbuf *mbuf;
+       uint16_t descs; /* num of descs inflight */
+       uint16_t nr_buffers; /* num of buffers inflight for packed ring */
+};
+
+struct vhost_async {
+       struct vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
+       struct vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
+       uint16_t iter_idx;
+       uint16_t iovec_idx;
+
+       /* data transfer status */
+       struct async_inflight_info *pkts_info;
+       /**
+        * Packet reorder array. "true" indicates that DMA device
+        * completes all copies for the packet.
+        *
+        * Note that this array could be written by multiple threads
+        * simultaneously. For example, in the case of thread0 and
+        * thread1 RX packets from NIC and then enqueue packets to
+        * vring0 and vring1 with own DMA device DMA0 and DMA1, it's
+        * possible for thread0 to get completed copies belonging to
+        * vring1 from DMA0, while thread0 is calling rte_vhost_poll
+        * _enqueue_completed() for vring0 and thread1 is calling
+        * rte_vhost_submit_enqueue_burst() for vring1. In this case,
+        * vq->access_lock cannot protect pkts_cmpl_flag of vring1.
+        *
+        * However, since offloading is per-packet basis, each packet
+        * flag will only be written by one thread. And single byte
+        * write is atomic, so no lock for pkts_cmpl_flag is needed.
+        */
+       bool *pkts_cmpl_flag;
+       uint16_t pkts_idx;
+       uint16_t pkts_inflight_n;
+       union {
+               struct vring_used_elem  *descs_split;
+               struct vring_used_elem_packed *buffers_packed;
+       };
+       union {
+               uint16_t desc_idx_split;
+               uint16_t buffer_idx_packed;
+       };
+       union {
+               uint16_t last_desc_idx_split;
+               uint16_t last_buffer_idx_packed;
+       };
+};
+
  /**
   * Structure contains variables relevant to RX/TX virtqueues.
   */
  /**
   * Structure contains variables relevant to RX/TX virtqueues.
   */
@@ -162,6 +270,7 @@ struct vhost_virtqueue {
  
         uint16_t                batch_copy_nb_elems;
         struct batch_copy_elem  *batch_copy_elems;
  
         uint16_t                batch_copy_nb_elems;
         struct batch_copy_elem  *batch_copy_elems;
+       int                     numa_node;
         bool                    used_wrap_counter;
         bool                    avail_wrap_counter;
  
         bool                    used_wrap_counter;
         bool                    avail_wrap_counter;
  
@@ -190,25 +299,7 @@ struct vhost_virtqueue {
         struct rte_vhost_resubmit_info *resubmit_inflight;
         uint64_t                global_counter;
  
         struct rte_vhost_resubmit_info *resubmit_inflight;
         uint64_t                global_counter;
  
-       /* operation callbacks for async dma */
-       struct rte_vhost_async_channel_ops      async_ops;
-
-       struct rte_vhost_iov_iter *it_pool;
-       struct iovec *vec_pool;
-
-       /* async data transfer status */
-       struct async_inflight_info *async_pkts_info;
-       uint16_t        async_pkts_idx;
-       uint16_t        async_pkts_inflight_n;
-       uint16_t        async_last_pkts_n;
-       struct vring_used_elem  *async_descs_split;
-       uint16_t async_desc_idx;
-       uint16_t last_async_desc_idx;
-
-       /* vq async features */
-       bool            async_inorder;
-       bool            async_registered;
-       uint16_t        async_threshold;
+       struct vhost_async      *async;
  
         int                     notif_enable;
  #define VIRTIO_UNINITIALIZED_NOTIF     (-1)
  
         int                     notif_enable;
  #define VIRTIO_UNINITIALIZED_NOTIF     (-1)
@@ -333,7 +424,8 @@ struct vring_packed_desc_event {
  
  struct guest_page {
         uint64_t guest_phys_addr;
  
  struct guest_page {
         uint64_t guest_phys_addr;
-       uint64_t host_phys_addr;
+       uint64_t host_iova;
+       uint64_t host_user_addr;
         uint64_t size;
  };
  
         uint64_t size;
  };
  
@@ -359,6 +451,7 @@ struct virtio_net {
         int16_t                 broadcast_rarp;
         uint32_t                nr_vring;
         int                     async_copy;
         int16_t                 broadcast_rarp;
         uint32_t                nr_vring;
         int                     async_copy;
+
         int                     extbuf;
         int                     linearbuf;
         struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
         int                     extbuf;
         int                     linearbuf;
         struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
@@ -372,7 +465,7 @@ struct virtio_net {
         uint16_t                mtu;
         uint8_t                 status;
  
         uint16_t                mtu;
         uint8_t                 status;
  
-       struct vhost_device_ops const *notify_ops;
+       struct rte_vhost_device_ops const *notify_ops;
  
         uint32_t                nr_guest_pages;
         uint32_t                max_guest_pages;
  
         uint32_t                nr_guest_pages;
         uint32_t                max_guest_pages;
@@ -546,8 +639,7 @@ extern int vhost_data_log_level;
  #define PRINT_PACKET(device, addr, size, header) do {} while (0)
  #endif
  
  #define PRINT_PACKET(device, addr, size, header) do {} while (0)
  #endif
  
-#define MAX_VHOST_DEVICE       1024
-extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+extern struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
  
  #define VHOST_BINARY_SEARCH_THRESH 256
  
  
  #define VHOST_BINARY_SEARCH_THRESH 256
  
@@ -565,6 +657,20 @@ static __rte_always_inline int guest_page_addrcmp(const void *p1,
         return 0;
  }
  
         return 0;
  }
  
+static __rte_always_inline int guest_page_rangecmp(const void *p1, const void *p2)
+{
+       const struct guest_page *page1 = (const struct guest_page *)p1;
+       const struct guest_page *page2 = (const struct guest_page *)p2;
+
+       if (page1->guest_phys_addr >= page2->guest_phys_addr) {
+               if (page1->guest_phys_addr < page2->guest_phys_addr + page2->size)
+                       return 0;
+               else
+                       return 1;
+       } else
+               return -1;
+}
+
  static __rte_always_inline rte_iova_t
  gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
         uint64_t gpa_size, uint64_t *hpa_size)
  static __rte_always_inline rte_iova_t
  gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
         uint64_t gpa_size, uint64_t *hpa_size)
@@ -575,20 +681,20 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
  
         *hpa_size = gpa_size;
         if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
  
         *hpa_size = gpa_size;
         if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
-               key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
+               key.guest_phys_addr = gpa;
                 page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
                 page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
-                              sizeof(struct guest_page), guest_page_addrcmp);
+                              sizeof(struct guest_page), guest_page_rangecmp);
                 if (page) {
                         if (gpa + gpa_size <=
                                         page->guest_phys_addr + page->size) {
                                 return gpa - page->guest_phys_addr +
                 if (page) {
                         if (gpa + gpa_size <=
                                         page->guest_phys_addr + page->size) {
                                 return gpa - page->guest_phys_addr +
-                                       page->host_phys_addr;
+                                       page->host_iova;
                         } else if (gpa < page->guest_phys_addr +
                                                 page->size) {
                                 *hpa_size = page->guest_phys_addr +
                                         page->size - gpa;
                                 return gpa - page->guest_phys_addr +
                         } else if (gpa < page->guest_phys_addr +
                                                 page->size) {
                                 *hpa_size = page->guest_phys_addr +
                                         page->size - gpa;
                                 return gpa - page->guest_phys_addr +
-                                       page->host_phys_addr;
+                                       page->host_iova;
                         }
                 }
         } else {
                         }
                 }
         } else {
@@ -599,13 +705,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
                                 if (gpa + gpa_size <=
                                         page->guest_phys_addr + page->size) {
                                         return gpa - page->guest_phys_addr +
                                 if (gpa + gpa_size <=
                                         page->guest_phys_addr + page->size) {
                                         return gpa - page->guest_phys_addr +
-                                               page->host_phys_addr;
+                                               page->host_iova;
                                 } else if (gpa < page->guest_phys_addr +
                                                         page->size) {
                                         *hpa_size = page->guest_phys_addr +
                                                 page->size - gpa;
                                         return gpa - page->guest_phys_addr +
                                 } else if (gpa < page->guest_phys_addr +
                                                         page->size) {
                                         *hpa_size = page->guest_phys_addr +
                                                 page->size - gpa;
                                         return gpa - page->guest_phys_addr +
-                                               page->host_phys_addr;
+                                               page->host_iova;
                                 }
                         }
                 }
                                 }
                         }
                 }
@@ -674,13 +780,13 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
  void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
  
  void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
  void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
  
  void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
-void vhost_set_builtin_virtio_net(int vid, bool enable);
+void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags);
  void vhost_enable_extbuf(int vid);
  void vhost_enable_linearbuf(int vid);
  int vhost_enable_guest_notification(struct virtio_net *dev,
                 struct vhost_virtqueue *vq, int enable);
  
  void vhost_enable_extbuf(int vid);
  void vhost_enable_linearbuf(int vid);
  int vhost_enable_guest_notification(struct virtio_net *dev,
                 struct vhost_virtqueue *vq, int enable);
  
-struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
+struct rte_vhost_device_ops const *vhost_driver_callback_get(const char *path);
  
  /*
   * Backend-specific cleanup.
  
  /*
   * Backend-specific cleanup.