+ return i;
+}
+
+static __rte_always_inline int
+vhost_reserve_avail_batch_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint16_t avail_idx,
+ uintptr_t *desc_addrs,
+ uint16_t *ids)
+{
+ bool wrap = vq->avail_wrap_counter;
+ struct vring_packed_desc *descs = vq->desc_packed;
+ struct virtio_net_hdr *hdr;
+ uint64_t lens[PACKED_BATCH_SIZE];
+ uint64_t buf_lens[PACKED_BATCH_SIZE];
+ uint32_t buf_offset = dev->vhost_hlen;
+ uint16_t flags, i;
+
+ if (unlikely(avail_idx & PACKED_BATCH_MASK))
+ return -1;
+ if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+ return -1;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ flags = descs[avail_idx + i].flags;
+ if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
+ (wrap == !!(flags & VRING_DESC_F_USED)) ||
+ (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
+ return -1;
+ }
+
+ rte_smp_rmb();
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ lens[i] = descs[avail_idx + i].len;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ desc_addrs[i] = vhost_iova_to_vva(dev, vq,
+ descs[avail_idx + i].addr,
+ &lens[i], VHOST_ACCESS_RW);
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ if (unlikely((lens[i] != descs[avail_idx + i].len)))
+ return -1;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, lens[i]);
+ if (!pkts[i])
+ goto free_buf;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
+ goto free_buf;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset;
+ pkts[i]->data_len = pkts[i]->pkt_len;
+ ids[i] = descs[avail_idx + i].id;
+ }
+
+ if (virtio_net_with_host_offload(dev)) {
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
+ vhost_dequeue_offload(hdr, pkts[i]);
+ }
+ }
+
+ return 0;
+
+free_buf:
+ for (i = 0; i < PACKED_BATCH_SIZE; i++)
+ rte_pktmbuf_free(pkts[i]);
+
+ return -1;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_batch_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts)
+{
+ uint16_t avail_idx = vq->last_avail_idx;
+ uint32_t buf_offset = dev->vhost_hlen;
+ uintptr_t desc_addrs[PACKED_BATCH_SIZE];
+ uint16_t ids[PACKED_BATCH_SIZE];
+ uint16_t i;
+
+ if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,
+ avail_idx, desc_addrs, ids))
+ return -1;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
+ (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
+ pkts[i]->pkt_len);
+
+ if (virtio_net_is_inorder(dev))
+ vhost_shadow_dequeue_batch_packed_inorder(vq,
+ ids[PACKED_BATCH_SIZE - 1]);
+ else
+ vhost_shadow_dequeue_batch_packed(dev, vq, ids);
+
+ vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
+
+ return 0;
+}
+
+static __rte_always_inline int
+vhost_dequeue_single_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint16_t *buf_id,
+ uint16_t *desc_count)
+{
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint32_t buf_len;
+ uint16_t nr_vec = 0;
+ int err;
+
+ if (unlikely(fill_vec_buf_packed(dev, vq,
+ vq->last_avail_idx, desc_count,
+ buf_vec, &nr_vec,
+ buf_id, &buf_len,
+ VHOST_ACCESS_RO) < 0))
+ return -1;
+
+ *pkts = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);
+ if (unlikely(*pkts == NULL)) {
+ VHOST_LOG_DATA(ERR,
+ "Failed to allocate memory for mbuf.\n");
+ return -1;
+ }
+
+ err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts,
+ mbuf_pool);
+ if (unlikely(err)) {
+ rte_pktmbuf_free(*pkts);
+ return -1;
+ }
+
+ return 0;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_single_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts)
+{
+
+ uint16_t buf_id, desc_count;
+
+ if (vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
+ &desc_count))
+ return -1;
+
+ if (virtio_net_is_inorder(dev))
+ vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
+ desc_count);
+ else
+ vhost_shadow_dequeue_single_packed(vq, buf_id, desc_count);
+
+ vq_inc_last_avail_packed(vq, desc_count);
+
+ return 0;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts)
+{
+ struct zcopy_mbuf *zmbufs[PACKED_BATCH_SIZE];
+ uintptr_t desc_addrs[PACKED_BATCH_SIZE];
+ uint16_t ids[PACKED_BATCH_SIZE];
+ uint16_t i;
+
+ uint16_t avail_idx = vq->last_avail_idx;
+
+ if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,
+ avail_idx, desc_addrs, ids))
+ return -1;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ zmbufs[i] = get_zmbuf(vq);
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ if (!zmbufs[i])
+ goto free_pkt;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ zmbufs[i]->mbuf = pkts[i];
+ zmbufs[i]->desc_idx = avail_idx + i;
+ zmbufs[i]->desc_count = 1;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_mbuf_refcnt_update(pkts[i], 1);
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbufs[i], next);
+
+ vq->nr_zmbuf += PACKED_BATCH_SIZE;
+ vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
+
+ return 0;
+
+free_pkt:
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_pktmbuf_free(pkts[i]);
+
+ return -1;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts)
+{
+ uint16_t buf_id, desc_count;
+ struct zcopy_mbuf *zmbuf;
+
+ if (vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
+ &desc_count))
+ return -1;
+
+ zmbuf = get_zmbuf(vq);
+ if (!zmbuf) {
+ rte_pktmbuf_free(*pkts);
+ return -1;
+ }
+ zmbuf->mbuf = *pkts;
+ zmbuf->desc_idx = vq->last_avail_idx;
+ zmbuf->desc_count = desc_count;
+
+ rte_mbuf_refcnt_update(*pkts, 1);
+
+ vq->nr_zmbuf += 1;
+ TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+
+ vq_inc_last_avail_packed(vq, desc_count);
+ return 0;
+}
+
+static __rte_always_inline void
+free_zmbuf(struct vhost_virtqueue *vq)
+{
+ struct zcopy_mbuf *next = NULL;
+ struct zcopy_mbuf *zmbuf;
+
+ for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+ zmbuf != NULL; zmbuf = next) {
+ next = TAILQ_NEXT(zmbuf, next);
+
+ uint16_t last_used_idx = vq->last_used_idx;
+
+ if (mbuf_is_consumed(zmbuf->mbuf)) {
+ uint16_t flags;
+ flags = vq->desc_packed[last_used_idx].flags;
+ if (vq->used_wrap_counter) {
+ flags |= VRING_DESC_F_USED;
+ flags |= VRING_DESC_F_AVAIL;
+ } else {
+ flags &= ~VRING_DESC_F_USED;
+ flags &= ~VRING_DESC_F_AVAIL;
+ }
+
+ vq->desc_packed[last_used_idx].id = zmbuf->desc_idx;
+ vq->desc_packed[last_used_idx].len = 0;
+
+ rte_smp_wmb();
+ vq->desc_packed[last_used_idx].flags = flags;
+
+ vq_inc_last_used_packed(vq, zmbuf->desc_count);
+
+ TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+ restore_mbuf(zmbuf->mbuf);
+ rte_pktmbuf_free(zmbuf->mbuf);
+ put_zmbuf(zmbuf);
+ vq->nr_zmbuf -= 1;
+ }
+ }
+}
+
+static __rte_noinline uint16_t
+virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint32_t count)
+{
+ uint32_t pkt_idx = 0;
+ uint32_t remained = count;
+
+ free_zmbuf(vq);
+
+ do {
+ if (remained >= PACKED_BATCH_SIZE) {
+ if (!virtio_dev_tx_batch_packed_zmbuf(dev, vq,
+ mbuf_pool, &pkts[pkt_idx])) {
+ pkt_idx += PACKED_BATCH_SIZE;
+ remained -= PACKED_BATCH_SIZE;
+ continue;
+ }
+ }
+
+ if (virtio_dev_tx_single_packed_zmbuf(dev, vq, mbuf_pool,
+ &pkts[pkt_idx]))
+ break;
+ pkt_idx++;
+ remained--;
+
+ } while (remained);
+
+ if (pkt_idx)
+ vhost_vring_call_packed(dev, vq);
+
+ return pkt_idx;
+}
+
+static __rte_always_inline bool
+next_desc_is_avail(const struct vhost_virtqueue *vq)
+{
+ bool wrap_counter = vq->avail_wrap_counter;
+ uint16_t next_used_idx = vq->last_used_idx + 1;
+
+ if (next_used_idx >= vq->size) {
+ next_used_idx -= vq->size;
+ wrap_counter ^= 1;
+ }
+
+ return desc_is_avail(&vq->desc_packed[next_used_idx], wrap_counter);
+}
+
+static __rte_noinline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint32_t count)
+{
+ uint32_t pkt_idx = 0;
+ uint32_t remained = count;
+
+ do {
+ rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+
+ if (remained >= PACKED_BATCH_SIZE) {
+ if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,
+ &pkts[pkt_idx])) {
+ vhost_flush_dequeue_packed(dev, vq);
+ pkt_idx += PACKED_BATCH_SIZE;
+ remained -= PACKED_BATCH_SIZE;
+ continue;
+ }
+ }
+
+ if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
+ &pkts[pkt_idx]))
+ break;
+ vhost_flush_dequeue_packed(dev, vq);
+ pkt_idx++;
+ remained--;
+
+ } while (remained);
+
+ if (vq->shadow_used_idx) {
+ do_data_copy_dequeue(vq);
+
+ if (remained && !next_desc_is_avail(vq)) {
+ /*
+ * The guest may be waiting to TX some buffers to
+ * enqueue more to avoid bufferfloat, so we try to
+ * reduce latency here.
+ */
+ vhost_flush_dequeue_shadow_packed(dev, vq);
+ vhost_vring_call_packed(dev, vq);
+ }
+ }
+
+ return pkt_idx;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+ struct virtio_net *dev;
+ struct rte_mbuf *rarp_mbuf = NULL;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
+ if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+ VHOST_LOG_DATA(ERR,
+ "(%d) %s: built-in vhost net backend is disabled.\n",
+ dev->vid, __func__);
+ return 0;
+ }
+
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+ VHOST_LOG_DATA(ERR,
+ "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ vq = dev->virtqueue[queue_id];
+
+ if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+ return 0;
+
+ if (unlikely(vq->enabled == 0)) {
+ count = 0;
+ goto out_access_unlock;
+ }
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0)) {
+ count = 0;
+ goto out;
+ }
+
+ /*
+ * Construct a RARP broadcast packet, and inject it to the "pkts"
+ * array, to looks like that guest actually send such packet.
+ *
+ * Check user_send_rarp() for more information.
+ *
+ * broadcast_rarp shares a cacheline in the virtio_net structure
+ * with some fields that are accessed during enqueue and
+ * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+ * result in false sharing between enqueue and dequeue.
+ *
+ * Prevent unnecessary false sharing by reading broadcast_rarp first
+ * and only performing cmpset if the read indicates it is likely to
+ * be set.
+ */
+ if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+ rte_atomic16_cmpset((volatile uint16_t *)
+ &dev->broadcast_rarp.cnt, 1, 0))) {
+
+ rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+ if (rarp_mbuf == NULL) {
+ VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
+ count = 0;
+ goto out;
+ }
+ count -= 1;
+ }
+
+ if (vq_is_packed(dev)) {
+ if (unlikely(dev->dequeue_zero_copy))
+ count = virtio_dev_tx_packed_zmbuf(dev, vq, mbuf_pool,
+ pkts, count);
+ else
+ count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts,
+ count);
+ } else
+ count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+