vhost: update packed ring dequeue
authorMarvin Liu <yong.liu@intel.com>
Thu, 24 Oct 2019 16:08:28 +0000 (00:08 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 25 Oct 2019 17:20:47 +0000 (19:20 +0200)
Buffer used ring updates as many as possible in vhost dequeue function
for coordinating with virtio driver. For supporting buffer, shadow used
ring element should contain descriptor's flags. First shadowed ring
index was recorded for calculating buffered number.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
lib/librte_vhost/vhost.h
lib/librte_vhost/virtio_net.c

index f8dbe84..9f11b28 100644 (file)
@@ -42,6 +42,8 @@
 #define PACKED_DESC_ENQUEUE_USED_FLAG(w)       \
        ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
                VRING_DESC_F_WRITE)
+#define PACKED_DESC_DEQUEUE_USED_FLAG(w)       \
+       ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED) : 0x0)
 #define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \
                                         VRING_DESC_F_INDIRECT)
 
@@ -114,6 +116,7 @@ struct log_cache_entry {
 
 struct vring_used_elem_packed {
        uint16_t id;
+       uint16_t flags;
        uint32_t len;
        uint32_t count;
 };
@@ -179,6 +182,8 @@ struct vhost_virtqueue {
        uint16_t                shadow_used_idx;
        /* Record packed ring enqueue latest desc cache aligned index */
        uint16_t                shadow_aligned_idx;
+       /* Record packed ring first dequeue desc index */
+       uint16_t                shadow_last_used_idx;
        struct vhost_vring_addr ring_addrs;
 
        struct batch_copy_elem  *batch_copy_elems;
index 51ce320..b09e03f 100644 (file)
@@ -154,6 +154,23 @@ vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
        vhost_log_cache_sync(dev, vq);
 }
 
+static __rte_always_inline void
+vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
+                                 struct vhost_virtqueue *vq)
+{
+       struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
+
+       vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
+       rte_smp_wmb();
+       vq->desc_packed[vq->shadow_last_used_idx].flags = used_elem->flags;
+
+       vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
+                                  sizeof(struct vring_packed_desc),
+                                  sizeof(struct vring_packed_desc));
+       vq->shadow_used_idx = 0;
+       vhost_log_cache_sync(dev, vq);
+}
+
 static __rte_always_inline void
 vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
                                 struct vhost_virtqueue *vq,
@@ -246,6 +263,78 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
        vhost_log_cache_sync(dev, vq);
 }
 
+static __rte_always_inline void
+vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
+                                 struct vhost_virtqueue *vq,
+                                 uint16_t *ids)
+{
+       uint16_t flags;
+       uint16_t i;
+       uint16_t begin;
+
+       flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
+
+       if (!vq->shadow_used_idx) {
+               vq->shadow_last_used_idx = vq->last_used_idx;
+               vq->shadow_used_packed[0].id  = ids[0];
+               vq->shadow_used_packed[0].len = 0;
+               vq->shadow_used_packed[0].count = 1;
+               vq->shadow_used_packed[0].flags = flags;
+               vq->shadow_used_idx++;
+               begin = 1;
+       } else
+               begin = 0;
+
+       vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
+               vq->desc_packed[vq->last_used_idx + i].id = ids[i];
+               vq->desc_packed[vq->last_used_idx + i].len = 0;
+       }
+
+       rte_smp_wmb();
+       vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
+               vq->desc_packed[vq->last_used_idx + i].flags = flags;
+
+       vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
+                                  sizeof(struct vring_packed_desc),
+                                  sizeof(struct vring_packed_desc) *
+                                  PACKED_BATCH_SIZE);
+       vhost_log_cache_sync(dev, vq);
+
+       vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
+}
+
+static __rte_always_inline void
+vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
+                                  uint16_t buf_id,
+                                  uint16_t count)
+{
+       uint16_t flags;
+
+       flags = vq->desc_packed[vq->last_used_idx].flags;
+       if (vq->used_wrap_counter) {
+               flags |= VRING_DESC_F_USED;
+               flags |= VRING_DESC_F_AVAIL;
+       } else {
+               flags &= ~VRING_DESC_F_USED;
+               flags &= ~VRING_DESC_F_AVAIL;
+       }
+
+       if (!vq->shadow_used_idx) {
+               vq->shadow_last_used_idx = vq->last_used_idx;
+
+               vq->shadow_used_packed[0].id  = buf_id;
+               vq->shadow_used_packed[0].len = 0;
+               vq->shadow_used_packed[0].flags = flags;
+               vq->shadow_used_idx++;
+       } else {
+               vq->desc_packed[vq->last_used_idx].id = buf_id;
+               vq->desc_packed[vq->last_used_idx].len = 0;
+               vq->desc_packed[vq->last_used_idx].flags = flags;
+       }
+
+       vq_inc_last_used_packed(vq, count);
+}
+
 static __rte_always_inline void
 update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
                         uint16_t desc_idx, uint32_t len, uint16_t count)
@@ -314,6 +403,25 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
        }
 }
 
+static __rte_unused void
+vhost_flush_dequeue_packed(struct virtio_net *dev,
+                          struct vhost_virtqueue *vq)
+{
+       int shadow_count;
+       if (!vq->shadow_used_idx)
+               return;
+
+       shadow_count = vq->last_used_idx - vq->shadow_last_used_idx;
+       if (shadow_count <= 0)
+               shadow_count += vq->size;
+
+       if ((uint32_t)shadow_count >= (vq->size - MAX_PKT_BURST)) {
+               do_data_copy_dequeue(vq);
+               vhost_flush_dequeue_shadow_packed(dev, vq);
+               vhost_vring_call_packed(dev, vq);
+       }
+}
+
 /* avoid write operation when necessary, to lessen cache issues */
 #define ASSIGN_UNLESS_EQUAL(var, val) do {     \
        if ((var) != (val))                     \
@@ -1876,6 +1984,8 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
                           (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
                           pkts[i]->pkt_len);
 
+       vhost_shadow_dequeue_batch_packed(dev, vq, ids);
+
        vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
 
        return 0;
@@ -1931,6 +2041,8 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,
                                        &desc_count))
                return -1;
 
+       vhost_shadow_dequeue_single_packed(vq, buf_id, desc_count);
+
        vq_inc_last_avail_packed(vq, desc_count);
 
        return 0;