+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+
+ buf_offset = 0;
+ buf_avail = buf_len;
+ } else {
+ buf_offset = dev->vhost_hlen;
+ buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
+ }
+
+ PRINT_PACKET(dev,
+ (uintptr_t)(buf_addr + buf_offset),
+ (uint32_t)buf_avail, 0);
+
+ mbuf_offset = 0;
+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
+ while (1) {
+ uint64_t hpa;
+
+ cpy_len = RTE_MIN(buf_avail, mbuf_avail);
+
+ /*
+ * A desc buf might across two host physical pages that are
+ * not continuous. In such case (gpa_to_hpa returns 0), data
+ * will be copied even though zero copy is enabled.
+ */
+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+ buf_iova + buf_offset, cpy_len)))) {
+ cur->data_len = cpy_len;
+ cur->data_off = 0;
+ cur->buf_addr =
+ (void *)(uintptr_t)(buf_addr + buf_offset);
+ cur->buf_iova = hpa;
+
+ /*
+ * In zero copy mode, one mbuf can only reference data
+ * for one or partial of one desc buff.
+ */
+ mbuf_avail = cpy_len;
+ } else {
+ if (likely(cpy_len > MAX_BATCH_LEN ||
+ vq->batch_copy_nb_elems >= vq->size ||
+ (hdr && cur == m))) {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(buf_addr +
+ buf_offset)),
+ cpy_len);
+ } else {
+ batch_copy[vq->batch_copy_nb_elems].dst =
+ rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset);
+ batch_copy[vq->batch_copy_nb_elems].src =
+ (void *)((uintptr_t)(buf_addr +
+ buf_offset));
+ batch_copy[vq->batch_copy_nb_elems].len =
+ cpy_len;
+ vq->batch_copy_nb_elems++;
+ }
+ }
+
+ mbuf_avail -= cpy_len;
+ mbuf_offset += cpy_len;
+ buf_avail -= cpy_len;
+ buf_offset += cpy_len;
+
+ /* This buf reaches to its end, get the next one */
+ if (buf_avail == 0) {
+ if (++vec_idx >= nr_vec)
+ break;
+
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+
+ buf_offset = 0;
+ buf_avail = buf_len;
+
+ PRINT_PACKET(dev, (uintptr_t)buf_addr,
+ (uint32_t)buf_avail, 0);
+ }
+
+ /*
+ * This mbuf reaches to its end, get a new one
+ * to hold more data.
+ */
+ if (mbuf_avail == 0) {
+ cur = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(cur == NULL)) {
+ VHOST_LOG_DATA(ERR, "Failed to "
+ "allocate memory for mbuf.\n");
+ error = -1;
+ goto out;
+ }
+ if (unlikely(dev->dequeue_zero_copy))
+ rte_mbuf_refcnt_update(cur, 1);
+
+ prev->next = cur;
+ prev->data_len = mbuf_offset;
+ m->nb_segs += 1;
+ m->pkt_len += mbuf_offset;
+ prev = cur;
+
+ mbuf_offset = 0;
+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+ }
+ }
+
+ prev->data_len = mbuf_offset;
+ m->pkt_len += mbuf_offset;
+
+ if (hdr)
+ vhost_dequeue_offload(hdr, m);
+
+out:
+
+ return error;
+}
+
+static __rte_always_inline struct zcopy_mbuf *
+get_zmbuf(struct vhost_virtqueue *vq)
+{
+ uint16_t i;
+ uint16_t last;
+ int tries = 0;
+
+ /* search [last_zmbuf_idx, zmbuf_size) */
+ i = vq->last_zmbuf_idx;
+ last = vq->zmbuf_size;
+
+again:
+ for (; i < last; i++) {
+ if (vq->zmbufs[i].in_use == 0) {
+ vq->last_zmbuf_idx = i + 1;
+ vq->zmbufs[i].in_use = 1;
+ return &vq->zmbufs[i];
+ }
+ }
+
+ tries++;
+ if (tries == 1) {
+ /* search [0, last_zmbuf_idx) */
+ i = 0;
+ last = vq->last_zmbuf_idx;
+ goto again;
+ }
+
+ return NULL;
+}
+
+static void
+virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
+{
+ rte_free(opaque);
+}
+
+static int
+virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size)
+{
+ struct rte_mbuf_ext_shared_info *shinfo = NULL;
+ uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
+ uint16_t buf_len;
+ rte_iova_t iova;
+ void *buf;
+
+ /* Try to use pkt buffer to store shinfo to reduce the amount of memory
+ * required, otherwise store shinfo in the new buffer.
+ */
+ if (rte_pktmbuf_tailroom(pkt) >= sizeof(*shinfo))
+ shinfo = rte_pktmbuf_mtod(pkt,
+ struct rte_mbuf_ext_shared_info *);
+ else {
+ total_len += sizeof(*shinfo) + sizeof(uintptr_t);
+ total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
+ }
+
+ if (unlikely(total_len > UINT16_MAX))
+ return -ENOSPC;
+
+ buf_len = total_len;
+ buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
+ if (unlikely(buf == NULL))
+ return -ENOMEM;
+
+ /* Initialize shinfo */
+ if (shinfo) {
+ shinfo->free_cb = virtio_dev_extbuf_free;
+ shinfo->fcb_opaque = buf;
+ rte_mbuf_ext_refcnt_set(shinfo, 1);
+ } else {
+ shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
+ virtio_dev_extbuf_free, buf);
+ if (unlikely(shinfo == NULL)) {
+ rte_free(buf);
+ VHOST_LOG_DATA(ERR, "Failed to init shinfo\n");
+ return -1;
+ }
+ }
+
+ iova = rte_malloc_virt2iova(buf);
+ rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
+ rte_pktmbuf_reset_headroom(pkt);
+
+ return 0;
+}
+
+/*
+ * Allocate a host supported pktmbuf.
+ */
+static __rte_always_inline struct rte_mbuf *
+virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp,
+ uint32_t data_len)
+{
+ struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp);
+
+ if (unlikely(pkt == NULL)) {
+ VHOST_LOG_DATA(ERR,
+ "Failed to allocate memory for mbuf.\n");
+ return NULL;
+ }
+
+ if (rte_pktmbuf_tailroom(pkt) >= data_len)
+ return pkt;
+
+ /* attach an external buffer if supported */
+ if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len))
+ return pkt;
+
+ /* check if chained buffers are allowed */
+ if (!dev->linearbuf)
+ return pkt;
+
+ /* Data doesn't fit into the buffer and the host supports
+ * only linear buffers
+ */
+ rte_pktmbuf_free(pkt);
+
+ return NULL;
+}
+
+static __rte_noinline uint16_t
+virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+ uint16_t i;
+ uint16_t free_entries;
+ uint16_t dropped = 0;
+ static bool allocerr_warned;
+
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf, *next;
+
+ for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+ zmbuf != NULL; zmbuf = next) {
+ next = TAILQ_NEXT(zmbuf, next);
+
+ if (mbuf_is_consumed(zmbuf->mbuf)) {
+ update_shadow_used_ring_split(vq,
+ zmbuf->desc_idx, 0);
+ TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+ restore_mbuf(zmbuf->mbuf);
+ rte_pktmbuf_free(zmbuf->mbuf);
+ put_zmbuf(zmbuf);
+ vq->nr_zmbuf -= 1;
+ }
+ }
+
+ if (likely(vq->shadow_used_idx)) {
+ flush_shadow_used_ring_split(dev, vq);
+ vhost_vring_call_split(dev, vq);
+ }
+ }
+
+ /*
+ * The ordering between avail index and
+ * desc reads needs to be enforced.
+ */
+ free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
+ vq->last_avail_idx;
+ if (free_entries == 0)
+ return 0;
+
+ rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
+ VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+
+ count = RTE_MIN(count, MAX_PKT_BURST);
+ count = RTE_MIN(count, free_entries);
+ VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n",
+ dev->vid, count);
+
+ for (i = 0; i < count; i++) {
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint16_t head_idx;
+ uint32_t buf_len;
+ uint16_t nr_vec = 0;
+ int err;
+
+ if (unlikely(fill_vec_buf_split(dev, vq,
+ vq->last_avail_idx + i,
+ &nr_vec, buf_vec,
+ &head_idx, &buf_len,
+ VHOST_ACCESS_RO) < 0))
+ break;
+
+ if (likely(dev->dequeue_zero_copy == 0))
+ update_shadow_used_ring_split(vq, head_idx, 0);
+
+ pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);
+ if (unlikely(pkts[i] == NULL)) {
+ /*
+ * mbuf allocation fails for jumbo packets when external
+ * buffer allocation is not allowed and linear buffer
+ * is required. Drop this packet.
+ */
+ if (!allocerr_warned) {
+ VHOST_LOG_DATA(ERR,
+ "Failed mbuf alloc of size %d from %s on %s.\n",
+ buf_len, mbuf_pool->name, dev->ifname);
+ allocerr_warned = true;
+ }
+ dropped += 1;
+ i++;
+ break;
+ }
+
+ err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+ mbuf_pool);
+ if (unlikely(err)) {
+ rte_pktmbuf_free(pkts[i]);
+ if (!allocerr_warned) {
+ VHOST_LOG_DATA(ERR,
+ "Failed to copy desc to mbuf on %s.\n",
+ dev->ifname);
+ allocerr_warned = true;
+ }
+ dropped += 1;
+ i++;
+ break;
+ }
+
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf;
+
+ zmbuf = get_zmbuf(vq);
+ if (!zmbuf) {
+ rte_pktmbuf_free(pkts[i]);
+ dropped += 1;
+ i++;
+ break;
+ }
+ zmbuf->mbuf = pkts[i];
+ zmbuf->desc_idx = head_idx;
+
+ /*
+ * Pin lock the mbuf; we will check later to see
+ * whether the mbuf is freed (when we are the last
+ * user) or not. If that's the case, we then could
+ * update the used ring safely.
+ */
+ rte_mbuf_refcnt_update(pkts[i], 1);
+
+ vq->nr_zmbuf += 1;
+ TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+ }
+ }
+ vq->last_avail_idx += i;
+
+ if (likely(dev->dequeue_zero_copy == 0)) {
+ do_data_copy_dequeue(vq);
+ if (unlikely(i < count))
+ vq->shadow_used_idx = i;
+ if (likely(vq->shadow_used_idx)) {
+ flush_shadow_used_ring_split(dev, vq);
+ vhost_vring_call_split(dev, vq);
+ }
+ }
+
+ return (i - dropped);
+}
+
+static __rte_always_inline int
+vhost_reserve_avail_batch_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint16_t avail_idx,
+ uintptr_t *desc_addrs,
+ uint16_t *ids)
+{
+ bool wrap = vq->avail_wrap_counter;
+ struct vring_packed_desc *descs = vq->desc_packed;
+ struct virtio_net_hdr *hdr;
+ uint64_t lens[PACKED_BATCH_SIZE];
+ uint64_t buf_lens[PACKED_BATCH_SIZE];
+ uint32_t buf_offset = dev->vhost_hlen;
+ uint16_t flags, i;
+
+ if (unlikely(avail_idx & PACKED_BATCH_MASK))
+ return -1;
+ if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+ return -1;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ flags = descs[avail_idx + i].flags;
+ if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
+ (wrap == !!(flags & VRING_DESC_F_USED)) ||
+ (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
+ return -1;
+ }
+
+ rte_smp_rmb();
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ lens[i] = descs[avail_idx + i].len;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ desc_addrs[i] = vhost_iova_to_vva(dev, vq,
+ descs[avail_idx + i].addr,
+ &lens[i], VHOST_ACCESS_RW);
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ if (unlikely(!desc_addrs[i]))
+ return -1;
+ if (unlikely((lens[i] != descs[avail_idx + i].len)))
+ return -1;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, lens[i]);
+ if (!pkts[i])
+ goto free_buf;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
+ goto free_buf;
+ }
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset;
+ pkts[i]->data_len = pkts[i]->pkt_len;
+ ids[i] = descs[avail_idx + i].id;
+ }
+
+ if (virtio_net_with_host_offload(dev)) {
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+ hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
+ vhost_dequeue_offload(hdr, pkts[i]);
+ }
+ }
+
+ return 0;
+
+free_buf:
+ for (i = 0; i < PACKED_BATCH_SIZE; i++)
+ rte_pktmbuf_free(pkts[i]);
+
+ return -1;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_batch_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts)
+{
+ uint16_t avail_idx = vq->last_avail_idx;
+ uint32_t buf_offset = dev->vhost_hlen;
+ uintptr_t desc_addrs[PACKED_BATCH_SIZE];
+ uint16_t ids[PACKED_BATCH_SIZE];
+ uint16_t i;
+
+ if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,
+ avail_idx, desc_addrs, ids))
+ return -1;
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+
+ vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+ rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
+ (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
+ pkts[i]->pkt_len);
+
+ if (virtio_net_is_inorder(dev))
+ vhost_shadow_dequeue_batch_packed_inorder(vq,
+ ids[PACKED_BATCH_SIZE - 1]);
+ else
+ vhost_shadow_dequeue_batch_packed(dev, vq, ids);
+
+ vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
+
+ return 0;
+}
+
+static __rte_always_inline int
+vhost_dequeue_single_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts,
+ uint16_t *buf_id,
+ uint16_t *desc_count)
+{
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint32_t buf_len;
+ uint16_t nr_vec = 0;
+ int err;
+ static bool allocerr_warned;
+
+ if (unlikely(fill_vec_buf_packed(dev, vq,
+ vq->last_avail_idx, desc_count,
+ buf_vec, &nr_vec,
+ buf_id, &buf_len,
+ VHOST_ACCESS_RO) < 0))
+ return -1;