X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_vhost%2Fvirtio_net.c;h=21041590472ccde899364d293f3627098f4348c0;hb=7470f845c17ac27ce08b22f3c024169e51ade990;hp=b09e03fbcc74c8dc3fe12266b24ccceb511c411e;hpb=c119edbc2d46c28eea28d2a691c78275e1e6acb6;p=dpdk.git diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index b09e03fbcc..2104159047 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -31,6 +31,12 @@ rxvq_is_mergeable(struct virtio_net *dev) return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); } +static __rte_always_inline bool +virtio_net_is_inorder(struct virtio_net *dev) +{ + return dev->features & (1ULL << VIRTIO_F_IN_ORDER); +} + static bool is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) { @@ -202,65 +208,21 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev, } static __rte_always_inline void -flush_shadow_used_ring_packed(struct virtio_net *dev, - struct vhost_virtqueue *vq) +vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, + uint16_t id) { - int i; - uint16_t used_idx = vq->last_used_idx; - uint16_t head_idx = vq->last_used_idx; - uint16_t head_flags = 0; - - /* Split loop in two to save memory barriers */ - for (i = 0; i < vq->shadow_used_idx; i++) { - vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; - vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; - - used_idx += vq->shadow_used_packed[i].count; - if (used_idx >= vq->size) - used_idx -= vq->size; - } - - for (i = 0; i < vq->shadow_used_idx; i++) { - uint16_t flags; - - if (vq->shadow_used_packed[i].len) - flags = VRING_DESC_F_WRITE; - else - flags = 0; - - if (vq->used_wrap_counter) { - flags |= VRING_DESC_F_USED; - flags |= VRING_DESC_F_AVAIL; - } else { - flags &= ~VRING_DESC_F_USED; - flags &= ~VRING_DESC_F_AVAIL; - } - - if (i > 0) { - vq->desc_packed[vq->last_used_idx].flags = flags; + vq->shadow_used_packed[0].id = id; - vhost_log_cache_used_vring(dev, vq, - vq->last_used_idx * - sizeof(struct vring_packed_desc), - sizeof(struct vring_packed_desc)); - } else { - head_idx = vq->last_used_idx; - head_flags = flags; - } - - vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + vq->shadow_used_packed[0].flags = + PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].count = 1; + vq->shadow_used_idx++; } - __atomic_store_n(&vq->desc_packed[head_idx].flags, head_flags, - __ATOMIC_RELEASE); - - vhost_log_cache_used_vring(dev, vq, - head_idx * - sizeof(struct vring_packed_desc), - sizeof(struct vring_packed_desc)); - - vq->shadow_used_idx = 0; - vhost_log_cache_sync(dev, vq); + vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); } static __rte_always_inline void @@ -336,14 +298,31 @@ vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, } static __rte_always_inline void -update_shadow_used_ring_packed(struct vhost_virtqueue *vq, - uint16_t desc_idx, uint32_t len, uint16_t count) +vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, + uint16_t buf_id, + uint16_t count) { - uint16_t i = vq->shadow_used_idx++; + uint16_t flags; + + vq->shadow_used_packed[0].id = buf_id; + + flags = vq->desc_packed[vq->last_used_idx].flags; + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; + } + + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].flags = flags; + vq->shadow_used_idx++; + } - vq->shadow_used_packed[i].id = desc_idx; - vq->shadow_used_packed[i].len = len; - vq->shadow_used_packed[i].count = count; + vq_inc_last_used_packed(vq, count); } static inline void @@ -403,7 +382,7 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev, } } -static __rte_unused void +static __rte_always_inline void vhost_flush_dequeue_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) { @@ -466,6 +445,7 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, m_buf->l2_len); + ipv4_hdr->hdr_checksum = 0; ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); } @@ -778,64 +758,6 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -/* - * Returns -1 on fail, 0 on success - */ -static inline int -reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t size, struct buf_vector *buf_vec, - uint16_t *nr_vec, uint16_t *num_buffers, - uint16_t *nr_descs) -{ - uint16_t avail_idx; - uint16_t vec_idx = 0; - uint16_t max_tries, tries = 0; - - uint16_t buf_id = 0; - uint32_t len = 0; - uint16_t desc_count; - - *num_buffers = 0; - avail_idx = vq->last_avail_idx; - - if (rxvq_is_mergeable(dev)) - max_tries = vq->size - 1; - else - max_tries = 1; - - while (size > 0) { - /* - * if we tried all available ring items, and still - * can't get enough buf, it means something abnormal - * happened. - */ - if (unlikely(++tries > max_tries)) - return -1; - - if (unlikely(fill_vec_buf_packed(dev, vq, - avail_idx, &desc_count, - buf_vec, &vec_idx, - &buf_id, &len, - VHOST_ACCESS_RW) < 0)) - return -1; - - len = RTE_MIN(len, size); - update_shadow_used_ring_packed(vq, buf_id, len, desc_count); - size -= len; - - avail_idx += desc_count; - if (avail_idx >= vq->size) - avail_idx -= vq->size; - - *nr_descs += desc_count; - *num_buffers += 1; - } - - *nr_vec = vec_idx; - - return 0; -} - static __rte_noinline void copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct buf_vector *buf_vec, @@ -903,7 +825,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, else hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", dev->vid, num_buffers); if (unlikely(buf_len < dev->vhost_hlen)) { @@ -1087,14 +1009,14 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, &num_buffers, avail_head, &nr_vec) < 0)) { - VHOST_LOG_DEBUG(VHOST_DATA, + VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); vq->shadow_used_idx -= num_buffers; break; } - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); @@ -1118,7 +1040,7 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, return pkt_idx; } -static __rte_unused int +static __rte_always_inline int virtio_dev_rx_batch_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts) @@ -1186,6 +1108,10 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, pkts[i]->pkt_len); } + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, + lens[i]); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) ids[i] = descs[avail_idx + i].id; @@ -1194,7 +1120,7 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, return 0; } -static __rte_unused int16_t +static __rte_always_inline int16_t virtio_dev_rx_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf *pkt) @@ -1205,13 +1131,13 @@ virtio_dev_rx_single_packed(struct virtio_net *dev, rte_smp_rmb(); if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, &nr_descs) < 0)) { - VHOST_LOG_DEBUG(VHOST_DATA, + VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); return -1; } - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + nr_descs); @@ -1221,49 +1147,41 @@ virtio_dev_rx_single_packed(struct virtio_net *dev, } static __rte_noinline uint32_t -virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf **pkts, uint32_t count) +virtio_dev_rx_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, + uint32_t count) { uint32_t pkt_idx = 0; - uint16_t num_buffers; - struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint32_t remained = count; - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; - uint16_t nr_vec = 0; - uint16_t nr_descs = 0; + do { + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); - if (unlikely(reserve_avail_buf_packed(dev, vq, - pkt_len, buf_vec, &nr_vec, - &num_buffers, &nr_descs) < 0)) { - VHOST_LOG_DEBUG(VHOST_DATA, - "(%d) failed to get enough desc from vring\n", - dev->vid); - vq->shadow_used_idx -= num_buffers; - break; + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_rx_batch_packed(dev, vq, + &pkts[pkt_idx])) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } } - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", - dev->vid, vq->last_avail_idx, - vq->last_avail_idx + num_buffers); - - if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], - buf_vec, nr_vec, - num_buffers) < 0) { - vq->shadow_used_idx -= num_buffers; + if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) break; - } - - vq_inc_last_avail_packed(vq, nr_descs); - } + pkt_idx++; + remained--; - do_data_copy_enqueue(dev, vq); + } while (pkt_idx < count); - if (likely(vq->shadow_used_idx)) { + if (vq->shadow_used_idx) { + do_data_copy_enqueue(dev, vq); vhost_flush_enqueue_shadow_packed(dev, vq); - vhost_vring_call_packed(dev, vq); } + if (pkt_idx) + vhost_vring_call_packed(dev, vq); + return pkt_idx; } @@ -1274,9 +1192,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct vhost_virtqueue *vq; uint32_t nb_tx = 0; - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; } @@ -1324,7 +1242,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, + VHOST_LOG_DATA(ERR, "(%d) %s: built-in vhost net backend is disabled.\n", dev->vid, __func__); return 0; @@ -1441,7 +1359,7 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) m->l4_len = sizeof(struct rte_udp_hdr); break; default: - RTE_LOG(WARNING, VHOST_DATA, + VHOST_LOG_DATA(WARNING, "unsupported gso type %u.\n", hdr->gso_type); break; } @@ -1613,7 +1531,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (mbuf_avail == 0) { cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { - RTE_LOG(ERR, VHOST_DATA, "Failed to " + VHOST_LOG_DATA(ERR, "Failed to " "allocate memory for mbuf.\n"); error = -1; goto out; @@ -1718,7 +1636,7 @@ virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) virtio_dev_extbuf_free, buf); if (unlikely(shinfo == NULL)) { rte_free(buf); - RTE_LOG(ERR, VHOST_DATA, "Failed to init shinfo\n"); + VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); return -1; } } @@ -1739,8 +1657,11 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp, { struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); - if (unlikely(pkt == NULL)) + if (unlikely(pkt == NULL)) { + VHOST_LOG_DATA(ERR, + "Failed to allocate memory for mbuf.\n"); return NULL; + } if (rte_pktmbuf_tailroom(pkt) >= data_len) return pkt; @@ -1805,11 +1726,11 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); for (i = 0; i < count; i++) { @@ -1960,7 +1881,7 @@ free_buf: return -1; } -static __rte_unused int +static __rte_always_inline int virtio_dev_tx_batch_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, @@ -1984,7 +1905,11 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev, (void *)(uintptr_t)(desc_addrs[i] + buf_offset), pkts[i]->pkt_len); - vhost_shadow_dequeue_batch_packed(dev, vq, ids); + if (virtio_net_is_inorder(dev)) + vhost_shadow_dequeue_batch_packed_inorder(vq, + ids[PACKED_BATCH_SIZE - 1]); + else + vhost_shadow_dequeue_batch_packed(dev, vq, ids); vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); @@ -2013,7 +1938,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev, *pkts = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); if (unlikely(*pkts == NULL)) { - RTE_LOG(ERR, VHOST_DATA, + VHOST_LOG_DATA(ERR, "Failed to allocate memory for mbuf.\n"); return -1; } @@ -2028,7 +1953,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev, return 0; } -static __rte_unused int +static __rte_always_inline int virtio_dev_tx_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, @@ -2041,119 +1966,234 @@ virtio_dev_tx_single_packed(struct virtio_net *dev, &desc_count)) return -1; - vhost_shadow_dequeue_single_packed(vq, buf_id, desc_count); + if (virtio_net_is_inorder(dev)) + vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, + desc_count); + else + vhost_shadow_dequeue_single_packed(vq, buf_id, desc_count); vq_inc_last_avail_packed(vq, desc_count); return 0; } -static __rte_noinline uint16_t -virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +static __rte_always_inline int +virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) { + struct zcopy_mbuf *zmbufs[PACKED_BATCH_SIZE]; + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; + uint16_t ids[PACKED_BATCH_SIZE]; uint16_t i; - if (unlikely(dev->dequeue_zero_copy)) { - struct zcopy_mbuf *zmbuf, *next; + uint16_t avail_idx = vq->last_avail_idx; - for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); - zmbuf != NULL; zmbuf = next) { - next = TAILQ_NEXT(zmbuf, next); + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, + avail_idx, desc_addrs, ids)) + return -1; - if (mbuf_is_consumed(zmbuf->mbuf)) { - update_shadow_used_ring_packed(vq, - zmbuf->desc_idx, - 0, - zmbuf->desc_count); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + zmbufs[i] = get_zmbuf(vq); - TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); - restore_mbuf(zmbuf->mbuf); - rte_pktmbuf_free(zmbuf->mbuf); - put_zmbuf(zmbuf); - vq->nr_zmbuf -= 1; + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (!zmbufs[i]) + goto free_pkt; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + zmbufs[i]->mbuf = pkts[i]; + zmbufs[i]->desc_idx = ids[i]; + zmbufs[i]->desc_count = 1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_mbuf_refcnt_update(pkts[i], 1); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbufs[i], next); + + vq->nr_zmbuf += PACKED_BATCH_SIZE; + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); + + return 0; + +free_pkt: + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_pktmbuf_free(pkts[i]); + + return -1; +} + +static __rte_always_inline int +virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + uint16_t buf_id, desc_count; + struct zcopy_mbuf *zmbuf; + + if (vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, + &desc_count)) + return -1; + + zmbuf = get_zmbuf(vq); + if (!zmbuf) { + rte_pktmbuf_free(*pkts); + return -1; + } + zmbuf->mbuf = *pkts; + zmbuf->desc_idx = buf_id; + zmbuf->desc_count = desc_count; + + rte_mbuf_refcnt_update(*pkts, 1); + + vq->nr_zmbuf += 1; + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); + + vq_inc_last_avail_packed(vq, desc_count); + return 0; +} + +static __rte_always_inline void +free_zmbuf(struct vhost_virtqueue *vq) +{ + struct zcopy_mbuf *next = NULL; + struct zcopy_mbuf *zmbuf; + + for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); + zmbuf != NULL; zmbuf = next) { + next = TAILQ_NEXT(zmbuf, next); + + uint16_t last_used_idx = vq->last_used_idx; + + if (mbuf_is_consumed(zmbuf->mbuf)) { + uint16_t flags; + flags = vq->desc_packed[last_used_idx].flags; + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; } - } - if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring_packed(dev, vq); - vhost_vring_call_packed(dev, vq); + vq->desc_packed[last_used_idx].id = zmbuf->desc_idx; + vq->desc_packed[last_used_idx].len = 0; + + rte_smp_wmb(); + vq->desc_packed[last_used_idx].flags = flags; + + vq_inc_last_used_packed(vq, zmbuf->desc_count); + + TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + restore_mbuf(zmbuf->mbuf); + rte_pktmbuf_free(zmbuf->mbuf); + put_zmbuf(zmbuf); + vq->nr_zmbuf -= 1; } } +} - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); +static __rte_noinline uint16_t +virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint32_t count) +{ + uint32_t pkt_idx = 0; + uint32_t remained = count; - count = RTE_MIN(count, MAX_PKT_BURST); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", - dev->vid, count); + free_zmbuf(vq); - for (i = 0; i < count; i++) { - struct buf_vector buf_vec[BUF_VECTOR_MAX]; - uint16_t buf_id; - uint32_t buf_len; - uint16_t desc_count, nr_vec = 0; - int err; + do { + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed_zmbuf(dev, vq, + mbuf_pool, &pkts[pkt_idx])) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } + } - if (unlikely(fill_vec_buf_packed(dev, vq, - vq->last_avail_idx, &desc_count, - buf_vec, &nr_vec, - &buf_id, &buf_len, - VHOST_ACCESS_RO) < 0)) + if (virtio_dev_tx_single_packed_zmbuf(dev, vq, mbuf_pool, + &pkts[pkt_idx])) break; + pkt_idx++; + remained--; - if (likely(dev->dequeue_zero_copy == 0)) - update_shadow_used_ring_packed(vq, buf_id, 0, - desc_count); + } while (remained); - pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); - if (unlikely(pkts[i] == NULL)) - break; + if (pkt_idx) + vhost_vring_call_packed(dev, vq); - err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], - mbuf_pool); - if (unlikely(err)) { - rte_pktmbuf_free(pkts[i]); - break; - } + return pkt_idx; +} - if (unlikely(dev->dequeue_zero_copy)) { - struct zcopy_mbuf *zmbuf; +static __rte_always_inline bool +next_desc_is_avail(const struct vhost_virtqueue *vq) +{ + bool wrap_counter = vq->avail_wrap_counter; + uint16_t next_used_idx = vq->last_used_idx + 1; - zmbuf = get_zmbuf(vq); - if (!zmbuf) { - rte_pktmbuf_free(pkts[i]); - break; - } - zmbuf->mbuf = pkts[i]; - zmbuf->desc_idx = buf_id; - zmbuf->desc_count = desc_count; + if (next_used_idx >= vq->size) { + next_used_idx -= vq->size; + wrap_counter ^= 1; + } - /* - * Pin lock the mbuf; we will check later to see - * whether the mbuf is freed (when we are the last - * user) or not. If that's the case, we then could - * update the used ring safely. - */ - rte_mbuf_refcnt_update(pkts[i], 1); + return desc_is_avail(&vq->desc_packed[next_used_idx], wrap_counter); +} - vq->nr_zmbuf += 1; - TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); +static __rte_noinline uint16_t +virtio_dev_tx_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint32_t count) +{ + uint32_t pkt_idx = 0; + uint32_t remained = count; + + do { + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); + + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) { + vhost_flush_dequeue_packed(dev, vq); + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } } - vq_inc_last_avail_packed(vq, desc_count); - } + if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) + break; + vhost_flush_dequeue_packed(dev, vq); + pkt_idx++; + remained--; - if (likely(dev->dequeue_zero_copy == 0)) { + } while (remained); + + if (vq->shadow_used_idx) { do_data_copy_dequeue(vq); - if (unlikely(i < count)) - vq->shadow_used_idx = i; - if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring_packed(dev, vq); + + if (remained && !next_desc_is_avail(vq)) { + /* + * The guest may be waiting to TX some buffers to + * enqueue more to avoid bufferfloat, so we try to + * reduce latency here. + */ + vhost_flush_dequeue_shadow_packed(dev, vq); vhost_vring_call_packed(dev, vq); } } - return i; + return pkt_idx; } uint16_t @@ -2169,14 +2209,15 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, return 0; if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, + VHOST_LOG_DATA(ERR, "(%d) %s: built-in vhost net backend is disabled.\n", dev->vid, __func__); return 0; } if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + VHOST_LOG_DATA(ERR, + "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; } @@ -2221,17 +2262,21 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); if (rarp_mbuf == NULL) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to make RARP packet.\n"); + VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); count = 0; goto out; } count -= 1; } - if (vq_is_packed(dev)) - count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count); - else + if (vq_is_packed(dev)) { + if (unlikely(dev->dequeue_zero_copy)) + count = virtio_dev_tx_packed_zmbuf(dev, vq, mbuf_pool, + pkts, count); + else + count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, + count); + } else count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count); out: