doc: add Meson coding style to contributors guide

[dpdk.git] / lib / librte_vhost / virtio_net.c
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c

index 0307956457ae50ed732a38ab41d5dd57a7ab7eda..ff39878609a603f14f3ad0bb1133067b9a5f06a7 100644 (file)
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -117,31 +117,6 @@ flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
                 sizeof(vq->used->idx));
  }
  
-static __rte_always_inline void
-async_flush_shadow_used_ring_split(struct virtio_net *dev,
-       struct vhost_virtqueue *vq)
-{
-       uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
-
-       if (used_idx + vq->shadow_used_idx <= vq->size) {
-               do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
-                                         vq->shadow_used_idx);
-       } else {
-               uint16_t size;
-
-               /* update used ring interval [used_idx, vq->size] */
-               size = vq->size - used_idx;
-               do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
-
-               /* update the left half used ring interval [0, left_size] */
-               do_flush_shadow_used_ring_split(dev, vq, 0, size,
-                                         vq->shadow_used_idx - size);
-       }
-
-       vq->last_used_idx += vq->shadow_used_idx;
-       vq->shadow_used_idx = 0;
-}
-
  static __rte_always_inline void
  update_shadow_used_ring_split(struct vhost_virtqueue *vq,
                          uint16_t desc_idx, uint32_t len)
@@ -171,7 +146,8 @@ vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
                         used_idx -= vq->size;
         }
  
-       rte_smp_wmb();
+       /* The ordering for storing desc flags needs to be enforced. */
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
  
         for (i = 0; i < vq->shadow_used_idx; i++) {
                 uint16_t flags;
@@ -222,8 +198,9 @@ vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
         struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
  
         vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
-       rte_smp_wmb();
-       vq->desc_packed[vq->shadow_last_used_idx].flags = used_elem->flags;
+       /* desc flags is the synchronization point for virtio packed vring */
+       __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
+                        used_elem->flags, __ATOMIC_RELEASE);
  
         vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
                                    sizeof(struct vring_packed_desc),
@@ -253,7 +230,7 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
                 vq->desc_packed[vq->last_used_idx + i].len = lens[i];
         }
  
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
  
         vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
                 vq->desc_packed[vq->last_used_idx + i].flags = flags;
@@ -312,7 +289,7 @@ vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
                 vq->desc_packed[vq->last_used_idx + i].len = 0;
         }
  
-       rte_smp_wmb();
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
         vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
                 vq->desc_packed[vq->last_used_idx + i].flags = flags;
  
@@ -571,10 +548,11 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
                         return -1;
                 }
  
-               len += descs[idx].len;
+               dlen = descs[idx].len;
+               len += dlen;
  
                 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
-                                               descs[idx].addr, descs[idx].len,
+                                               descs[idx].addr, dlen,
                                                 perm))) {
                         free_ind_table(idesc);
                         return -1;
@@ -691,9 +669,10 @@ fill_vec_buf_packed_indirect(struct virtio_net *dev,
                         return -1;
                 }
  
-               *len += descs[i].len;
+               dlen = descs[i].len;
+               *len += dlen;
                 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
-                                               descs[i].addr, descs[i].len,
+                                               descs[i].addr, dlen,
                                                 perm)))
                         return -1;
         }
@@ -714,6 +693,7 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
         bool wrap_counter = vq->avail_wrap_counter;
         struct vring_packed_desc *descs = vq->desc_packed;
         uint16_t vec_id = *vec_idx;
+       uint64_t dlen;
  
         if (avail_idx < vq->last_avail_idx)
                 wrap_counter ^= 1;
@@ -746,11 +726,12 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                                         len, perm) < 0))
                                 return -1;
                 } else {
-                       *len += descs[avail_idx].len;
+                       dlen = descs[avail_idx].len;
+                       *len += dlen;
  
                         if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
                                                         descs[avail_idx].addr,
-                                                       descs[avail_idx].len,
+                                                       dlen,
                                                         perm)))
                                 return -1;
                 }
@@ -831,9 +812,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
  
         hdr_mbuf = m;
         hdr_addr = buf_addr;
-       if (unlikely(buf_len < dev->vhost_hlen))
+       if (unlikely(buf_len < dev->vhost_hlen)) {
+               memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
                 hdr = &tmp_hdr;
-       else
+       } else
                 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
  
         VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
@@ -1004,9 +986,10 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
  
         hdr_mbuf = m;
         hdr_addr = buf_addr;
-       if (unlikely(buf_len < dev->vhost_hlen))
+       if (unlikely(buf_len < dev->vhost_hlen)) {
+               memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
                 hdr = &tmp_hdr;
-       else
+       } else
                 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
  
         VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
@@ -1128,8 +1111,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
         }
  
  out:
-       async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
-       async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
+       if (tlen) {
+               async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
+               async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
+       } else {
+               src_it->count = 0;
+       }
  
         return error;
  }
@@ -1281,8 +1268,6 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev,
                         return -1;
         }
  
-       rte_smp_rmb();
-
         vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
                 lens[i] = descs[avail_idx + i].len;
  
@@ -1343,7 +1328,6 @@ virtio_dev_rx_single_packed(struct virtio_net *dev,
         struct buf_vector buf_vec[BUF_VECTOR_MAX];
         uint16_t nr_descs = 0;
  
-       rte_smp_rmb();
         if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
                                                  &nr_descs) < 0)) {
                 VHOST_LOG_DATA(DEBUG,
@@ -1418,13 +1402,13 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
  
         rte_spinlock_lock(&vq->access_lock);
  
-       if (unlikely(vq->enabled == 0))
+       if (unlikely(!vq->enabled))
                 goto out_access_unlock;
  
         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                 vhost_user_iotlb_rd_lock(vq);
  
-       if (unlikely(vq->access_ok == 0))
+       if (unlikely(!vq->access_ok))
                 if (unlikely(vring_translate(dev, vq) < 0))
                         goto out;
  
@@ -1477,7 +1461,8 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
  static __rte_noinline uint32_t
  virtio_dev_rx_async_submit_split(struct virtio_net *dev,
         struct vhost_virtqueue *vq, uint16_t queue_id,
-       struct rte_mbuf **pkts, uint32_t count)
+       struct rte_mbuf **pkts, uint32_t count,
+       struct rte_mbuf **comp_pkts, uint32_t *comp_count)
  {
         uint32_t pkt_idx = 0, pkt_burst_idx = 0;
         uint16_t num_buffers;
@@ -1491,19 +1476,20 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
         struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
         struct rte_vhost_iov_iter *src_it = it_pool;
         struct rte_vhost_iov_iter *dst_it = it_pool + 1;
-       uint16_t n_free_slot, slot_idx;
-       uint16_t pkt_err = 0;
+       uint16_t slot_idx = 0;
         uint16_t segs_await = 0;
         struct async_inflight_info *pkts_info = vq->async_pkts_info;
-       int n_pkts = 0;
-
-       avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
+       uint32_t n_pkts = 0, pkt_err = 0;
+       uint32_t num_async_pkts = 0, num_done_pkts = 0;
+       struct {
+               uint16_t pkt_idx;
+               uint16_t last_avail_idx;
+       } async_pkts_log[MAX_PKT_BURST];
  
         /*
-        * The ordering between avail index and
-        * desc reads needs to be enforced.
+        * The ordering between avail index and desc reads need to be enforced.
          */
-       rte_smp_rmb();
+       avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
  
         rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
  
@@ -1532,34 +1518,61 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
                         break;
                 }
  
-               slot_idx = (vq->async_pkts_idx + pkt_idx) & (vq->size - 1);
+               slot_idx = (vq->async_pkts_idx + num_async_pkts) &
+                       (vq->size - 1);
                 if (src_it->count) {
-                       async_fill_desc(&tdes[pkt_burst_idx], src_it, dst_it);
-                       pkt_burst_idx++;
+                       uint16_t from, to;
+
+                       async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
                         pkts_info[slot_idx].descs = num_buffers;
-                       pkts_info[slot_idx].segs = src_it->nr_segs;
+                       pkts_info[slot_idx].mbuf = pkts[pkt_idx];
+                       async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
+                       async_pkts_log[num_async_pkts++].last_avail_idx =
+                               vq->last_avail_idx;
                         src_iovec += src_it->nr_segs;
                         dst_iovec += dst_it->nr_segs;
                         src_it += 2;
                         dst_it += 2;
                         segs_await += src_it->nr_segs;
-               } else {
-                       pkts_info[slot_idx].info = num_buffers;
-                       vq->async_pkts_inflight_n++;
-               }
+
+                       /**
+                        * recover shadow used ring and keep DMA-occupied
+                        * descriptors.
+                        */
+                       from = vq->shadow_used_idx - num_buffers;
+                       to = vq->async_desc_idx & (vq->size - 1);
+                       if (num_buffers + to <= vq->size) {
+                               rte_memcpy(&vq->async_descs_split[to],
+                                               &vq->shadow_used_split[from],
+                                               num_buffers *
+                                               sizeof(struct vring_used_elem));
+                       } else {
+                               int size = vq->size - to;
+
+                               rte_memcpy(&vq->async_descs_split[to],
+                                               &vq->shadow_used_split[from],
+                                               size *
+                                               sizeof(struct vring_used_elem));
+                               rte_memcpy(vq->async_descs_split,
+                                               &vq->shadow_used_split[from +
+                                               size], (num_buffers - size) *
+                                          sizeof(struct vring_used_elem));
+                       }
+                       vq->async_desc_idx += num_buffers;
+                       vq->shadow_used_idx -= num_buffers;
+               } else
+                       comp_pkts[num_done_pkts++] = pkts[pkt_idx];
  
                 vq->last_avail_idx += num_buffers;
  
                 /*
                  * conditions to trigger async device transfer:
                  * - buffered packet number reaches transfer threshold
-                * - this is the last packet in the burst enqueue
                  * - unused async iov number is less than max vhost vector
                  */
-               if (pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
-                       (pkt_idx == count - 1 && pkt_burst_idx) ||
-                       (VHOST_MAX_ASYNC_VEC / 2 - segs_await <
-                       BUF_VECTOR_MAX)) {
+               if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
+                       ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
+                       BUF_VECTOR_MAX))) {
                         n_pkts = vq->async_ops.transfer_data(dev->vid,
                                         queue_id, tdes, 0, pkt_burst_idx);
                         src_iovec = vec_pool;
@@ -1569,7 +1582,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
                         segs_await = 0;
                         vq->async_pkts_inflight_n += n_pkts;
  
-                       if (unlikely(n_pkts < (int)pkt_burst_idx)) {
+                       if (unlikely(n_pkts < pkt_burst_idx)) {
                                 /*
                                  * log error packets number here and do actual
                                  * error processing when applications poll
@@ -1589,38 +1602,39 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
                                 queue_id, tdes, 0, pkt_burst_idx);
                 vq->async_pkts_inflight_n += n_pkts;
  
-               if (unlikely(n_pkts < (int)pkt_burst_idx))
+               if (unlikely(n_pkts < pkt_burst_idx))
                         pkt_err = pkt_burst_idx - n_pkts;
         }
  
         do_data_copy_enqueue(dev, vq);
  
-       while (unlikely(pkt_err && pkt_idx)) {
-               if (pkts_info[slot_idx].segs)
-                       pkt_err--;
-               vq->last_avail_idx -= pkts_info[slot_idx].descs;
-               vq->shadow_used_idx -= pkts_info[slot_idx].descs;
-               vq->async_pkts_inflight_n--;
-               slot_idx = (slot_idx - 1) & (vq->size - 1);
-               pkt_idx--;
-       }
-
-       n_free_slot = vq->size - vq->async_pkts_idx;
-       if (n_free_slot > pkt_idx) {
-               rte_memcpy(&vq->async_pkts_pending[vq->async_pkts_idx],
-                       pkts, pkt_idx * sizeof(uintptr_t));
-               vq->async_pkts_idx += pkt_idx;
-       } else {
-               rte_memcpy(&vq->async_pkts_pending[vq->async_pkts_idx],
-                       pkts, n_free_slot * sizeof(uintptr_t));
-               rte_memcpy(&vq->async_pkts_pending[0],
-                       &pkts[n_free_slot],
-                       (pkt_idx - n_free_slot) * sizeof(uintptr_t));
-               vq->async_pkts_idx = pkt_idx - n_free_slot;
+       if (unlikely(pkt_err)) {
+               uint16_t num_descs = 0;
+
+               num_async_pkts -= pkt_err;
+               /* calculate the sum of descriptors of DMA-error packets. */
+               while (pkt_err-- > 0) {
+                       num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
+                       slot_idx--;
+               }
+               vq->async_desc_idx -= num_descs;
+               /* recover shadow used ring and available ring */
+               vq->shadow_used_idx -= (vq->last_avail_idx -
+                               async_pkts_log[num_async_pkts].last_avail_idx -
+                               num_descs);
+               vq->last_avail_idx =
+                       async_pkts_log[num_async_pkts].last_avail_idx;
+               pkt_idx = async_pkts_log[num_async_pkts].pkt_idx;
+               num_done_pkts = pkt_idx - num_async_pkts;
         }
  
-       if (likely(vq->shadow_used_idx))
-               async_flush_shadow_used_ring_split(dev, vq);
+       vq->async_pkts_idx += num_async_pkts;
+       *comp_count = num_done_pkts;
+
+       if (likely(vq->shadow_used_idx)) {
+               flush_shadow_used_ring_split(dev, vq);
+               vhost_vring_call_split(dev, vq);
+       }
  
         return pkt_idx;
  }
@@ -1632,8 +1646,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
         struct vhost_virtqueue *vq;
         uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0;
         uint16_t start_idx, pkts_idx, vq_size;
-       uint16_t n_inflight;
         struct async_inflight_info *pkts_info;
+       uint16_t from, i;
  
         if (!dev)
                 return 0;
@@ -1655,8 +1669,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
  
         rte_spinlock_lock(&vq->access_lock);
  
-       n_inflight = vq->async_pkts_inflight_n;
-       pkts_idx = vq->async_pkts_idx;
+       pkts_idx = vq->async_pkts_idx & (vq->size - 1);
         pkts_info = vq->async_pkts_info;
         vq_size = vq->size;
         start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,
@@ -1667,42 +1680,61 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
                         queue_id, 0, count - vq->async_last_pkts_n);
         n_pkts_cpl += vq->async_last_pkts_n;
  
-       rte_smp_wmb();
-
-       while (likely((n_pkts_put < count) && n_inflight)) {
-               uint16_t info_idx = (start_idx + n_pkts_put) & (vq_size - 1);
-               if (n_pkts_cpl && pkts_info[info_idx].segs)
-                       n_pkts_cpl--;
-               else if (!n_pkts_cpl && pkts_info[info_idx].segs)
-                       break;
-               n_pkts_put++;
-               n_inflight--;
-               n_descs += pkts_info[info_idx].descs;
-       }
-
-       vq->async_last_pkts_n = n_pkts_cpl;
+       n_pkts_put = RTE_MIN(count, n_pkts_cpl);
+       if (unlikely(n_pkts_put == 0)) {
+               vq->async_last_pkts_n = n_pkts_cpl;
+               goto done;
+       }
+
+       for (i = 0; i < n_pkts_put; i++) {
+               from = (start_idx + i) & (vq_size - 1);
+               n_descs += pkts_info[from].descs;
+               pkts[i] = pkts_info[from].mbuf;
+       }
+       vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;
+       vq->async_pkts_inflight_n -= n_pkts_put;
+
+       if (likely(vq->enabled && vq->access_ok)) {
+               uint16_t nr_left = n_descs;
+               uint16_t nr_copy;
+               uint16_t to;
+
+               /* write back completed descriptors to used ring */
+               do {
+                       from = vq->last_async_desc_idx & (vq->size - 1);
+                       nr_copy = nr_left + from <= vq->size ? nr_left :
+                               vq->size - from;
+                       to = vq->last_used_idx & (vq->size - 1);
+
+                       if (to + nr_copy <= vq->size) {
+                               rte_memcpy(&vq->used->ring[to],
+                                               &vq->async_descs_split[from],
+                                               nr_copy *
+                                               sizeof(struct vring_used_elem));
+                       } else {
+                               uint16_t size = vq->size - to;
+
+                               rte_memcpy(&vq->used->ring[to],
+                                               &vq->async_descs_split[from],
+                                               size *
+                                               sizeof(struct vring_used_elem));
+                               rte_memcpy(vq->used->ring,
+                                               &vq->async_descs_split[from +
+                                               size], (nr_copy - size) *
+                                               sizeof(struct vring_used_elem));
+                       }
  
-       if (n_pkts_put) {
-               vq->async_pkts_inflight_n = n_inflight;
-               if (likely(vq->enabled && vq->access_ok)) {
-                       __atomic_add_fetch(&vq->used->idx,
-                                       n_descs, __ATOMIC_RELEASE);
-                       vhost_vring_call_split(dev, vq);
-               }
+                       vq->last_async_desc_idx += nr_copy;
+                       vq->last_used_idx += nr_copy;
+                       nr_left -= nr_copy;
+               } while (nr_left > 0);
  
-               if (start_idx + n_pkts_put <= vq_size) {
-                       rte_memcpy(pkts, &vq->async_pkts_pending[start_idx],
-                               n_pkts_put * sizeof(uintptr_t));
-               } else {
-                       rte_memcpy(pkts, &vq->async_pkts_pending[start_idx],
-                               (vq_size - start_idx) * sizeof(uintptr_t));
-                       rte_memcpy(&pkts[vq_size - start_idx],
-                               vq->async_pkts_pending,
-                               (n_pkts_put + start_idx - vq_size) *
-                               sizeof(uintptr_t));
-               }
-       }
+               __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE);
+               vhost_vring_call_split(dev, vq);
+       } else
+               vq->last_async_desc_idx += n_descs;
  
+done:
         rte_spinlock_unlock(&vq->access_lock);
  
         return n_pkts_put;
@@ -1710,7 +1742,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
  
  static __rte_always_inline uint32_t
  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
-       struct rte_mbuf **pkts, uint32_t count)
+       struct rte_mbuf **pkts, uint32_t count,
+       struct rte_mbuf **comp_pkts, uint32_t *comp_count)
  {
         struct vhost_virtqueue *vq;
         uint32_t nb_tx = 0;
@@ -1726,13 +1759,13 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
  
         rte_spinlock_lock(&vq->access_lock);
  
-       if (unlikely(vq->enabled == 0 || !vq->async_registered))
+       if (unlikely(!vq->enabled || !vq->async_registered))
                 goto out_access_unlock;
  
         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                 vhost_user_iotlb_rd_lock(vq);
  
-       if (unlikely(vq->access_ok == 0))
+       if (unlikely(!vq->access_ok))
                 if (unlikely(vring_translate(dev, vq) < 0))
                         goto out;
  
@@ -1745,7 +1778,8 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
                 nb_tx = 0;
         else
                 nb_tx = virtio_dev_rx_async_submit_split(dev,
-                               vq, queue_id, pkts, count);
+                               vq, queue_id, pkts, count, comp_pkts,
+                               comp_count);
  
  out:
         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -1759,10 +1793,12 @@ out_access_unlock:
  
  uint16_t
  rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count)
+               struct rte_mbuf **pkts, uint16_t count,
+               struct rte_mbuf **comp_pkts, uint32_t *comp_count)
  {
         struct virtio_net *dev = get_device(vid);
  
+       *comp_count = 0;
         if (!dev)
                 return 0;
  
@@ -1773,7 +1809,8 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
                 return 0;
         }
  
-       return virtio_dev_rx_async_submit(dev, queue_id, pkts, count);
+       return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,
+                       comp_count);
  }
  
  static inline bool
@@ -2232,7 +2269,6 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,
  {
         bool wrap = vq->avail_wrap_counter;
         struct vring_packed_desc *descs = vq->desc_packed;
-       struct virtio_net_hdr *hdr;
         uint64_t lens[PACKED_BATCH_SIZE];
         uint64_t buf_lens[PACKED_BATCH_SIZE];
         uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -2251,7 +2287,7 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,
                         return -1;
         }
  
-       rte_smp_rmb();
+       rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
  
         vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
                 lens[i] = descs[avail_idx + i].len;
@@ -2284,18 +2320,11 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,
         }
  
         vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
-               pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset;
+               pkts[i]->pkt_len = lens[i] - buf_offset;
                 pkts[i]->data_len = pkts[i]->pkt_len;
                 ids[i] = descs[avail_idx + i].id;
         }
  
-       if (virtio_net_with_host_offload(dev)) {
-               vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
-                       hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
-                       vhost_dequeue_offload(hdr, pkts[i]);
-               }
-       }
-
         return 0;
  
  free_buf:
@@ -2313,6 +2342,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
  {
         uint16_t avail_idx = vq->last_avail_idx;
         uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+       struct virtio_net_hdr *hdr;
         uintptr_t desc_addrs[PACKED_BATCH_SIZE];
         uint16_t ids[PACKED_BATCH_SIZE];
         uint16_t i;
@@ -2329,6 +2359,13 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
                            (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
                            pkts[i]->pkt_len);
  
+       if (virtio_net_with_host_offload(dev)) {
+               vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+                       hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
+                       vhost_dequeue_offload(hdr, pkts[i]);
+               }
+       }
+
         if (virtio_net_is_inorder(dev))
                 vhost_shadow_dequeue_batch_packed_inorder(vq,
                         ids[PACKED_BATCH_SIZE - 1]);
@@ -2487,7 +2524,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
         if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
                 return 0;
  
-       if (unlikely(vq->enabled == 0)) {
+       if (unlikely(!vq->enabled)) {
                 count = 0;
                 goto out_access_unlock;
         }
@@ -2495,7 +2532,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
                 vhost_user_iotlb_rd_lock(vq);
  
-       if (unlikely(vq->access_ok == 0))
+       if (unlikely(!vq->access_ok))
                 if (unlikely(vring_translate(dev, vq) < 0)) {
                         count = 0;
                         goto out;