vhost: relax full barriers for desc flags
[dpdk.git] / lib / librte_vhost / virtio_net.c
index a05d44f..b779034 100644 (file)
@@ -222,8 +222,9 @@ vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
        struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
 
        vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
-       rte_smp_wmb();
-       vq->desc_packed[vq->shadow_last_used_idx].flags = used_elem->flags;
+       /* desc flags is the synchronization point for virtio packed vring */
+       __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
+                        used_elem->flags, __ATOMIC_RELEASE);
 
        vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
                                   sizeof(struct vring_packed_desc),
@@ -1281,8 +1282,6 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev,
                        return -1;
        }
 
-       rte_smp_rmb();
-
        vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
                lens[i] = descs[avail_idx + i].len;
 
@@ -1343,7 +1342,6 @@ virtio_dev_rx_single_packed(struct virtio_net *dev,
        struct buf_vector buf_vec[BUF_VECTOR_MAX];
        uint16_t nr_descs = 0;
 
-       rte_smp_rmb();
        if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
                                                 &nr_descs) < 0)) {
                VHOST_LOG_DATA(DEBUG,
@@ -1491,18 +1489,16 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
        struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
        struct rte_vhost_iov_iter *src_it = it_pool;
        struct rte_vhost_iov_iter *dst_it = it_pool + 1;
-       uint16_t n_free_slot, slot_idx;
+       uint16_t n_free_slot, slot_idx = 0;
        uint16_t pkt_err = 0;
+       uint16_t segs_await = 0;
        struct async_inflight_info *pkts_info = vq->async_pkts_info;
        int n_pkts = 0;
 
-       avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
-
        /*
-        * The ordering between avail index and
-        * desc reads needs to be enforced.
+        * The ordering between avail index and desc reads need to be enforced.
         */
-       rte_smp_rmb();
+       avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
 
        rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
@@ -1541,6 +1537,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
                        dst_iovec += dst_it->nr_segs;
                        src_it += 2;
                        dst_it += 2;
+                       segs_await += src_it->nr_segs;
                } else {
                        pkts_info[slot_idx].info = num_buffers;
                        vq->async_pkts_inflight_n++;
@@ -1548,15 +1545,24 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 
                vq->last_avail_idx += num_buffers;
 
+               /*
+                * conditions to trigger async device transfer:
+                * - buffered packet number reaches transfer threshold
+                * - this is the last packet in the burst enqueue
+                * - unused async iov number is less than max vhost vector
+                */
                if (pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
-                               (pkt_idx == count - 1 && pkt_burst_idx)) {
+                       (pkt_idx == count - 1 && pkt_burst_idx) ||
+                       (VHOST_MAX_ASYNC_VEC / 2 - segs_await <
+                       BUF_VECTOR_MAX)) {
                        n_pkts = vq->async_ops.transfer_data(dev->vid,
                                        queue_id, tdes, 0, pkt_burst_idx);
                        src_iovec = vec_pool;
                        dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
                        src_it = it_pool;
                        dst_it = it_pool + 1;
-                       vq->async_pkts_inflight_n += n_pkts;
+                       segs_await = 0;
+                       vq->async_pkts_inflight_n += pkt_burst_idx;
 
                        if (unlikely(n_pkts < (int)pkt_burst_idx)) {
                                /*
@@ -1576,7 +1582,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
        if (pkt_burst_idx) {
                n_pkts = vq->async_ops.transfer_data(dev->vid,
                                queue_id, tdes, 0, pkt_burst_idx);
-               vq->async_pkts_inflight_n += n_pkts;
+               vq->async_pkts_inflight_n += pkt_burst_idx;
 
                if (unlikely(n_pkts < (int)pkt_burst_idx))
                        pkt_err = pkt_burst_idx - n_pkts;
@@ -1703,7 +1709,6 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 {
        struct vhost_virtqueue *vq;
        uint32_t nb_tx = 0;
-       bool drawback = false;
 
        VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
        if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
@@ -1716,14 +1721,9 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 
        rte_spinlock_lock(&vq->access_lock);
 
-       if (unlikely(vq->enabled == 0))
+       if (unlikely(vq->enabled == 0 || !vq->async_registered))
                goto out_access_unlock;
 
-       if (unlikely(!vq->async_registered)) {
-               drawback = true;
-               goto out_access_unlock;
-       }
-
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                vhost_user_iotlb_rd_lock(vq);
 
@@ -1749,9 +1749,6 @@ out:
 out_access_unlock:
        rte_spinlock_unlock(&vq->access_lock);
 
-       if (drawback)
-               return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, count);
-
        return nb_tx;
 }