This section is a comment. Make sure to start the actual text at the margin.
+* **Added vhost-user indirect descriptors support.**
+
+ If indirect descriptor feature is negotiated, each packet sent by the guest
+ will take exactly one slot in the enqueue virtqueue. Without the feature, in
+ current version, even 64 bytes packets take two slots with Virtio PMD on guest
+ side.
+
+ The main impact is better performance for 0% packet loss use-cases, as it
+ behaves as if the virtqueue size was enlarged, so more packets can be buffered
+ in case of system perturbations. On the downside, small performance degradation
+ is measured when running micro-benchmarks.
+
Resolved Issues
---------------
}
static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+ uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
- desc = &vq->desc[desc_idx];
- if (unlikely(desc->len < dev->vhost_hlen))
+ desc = &descs[desc_idx];
+ if (unlikely((desc->len < dev->vhost_hlen)) ||
+ (desc->flags & VRING_DESC_F_INDIRECT))
return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
*/
if (likely((desc->len == dev->vhost_hlen) &&
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
- desc = &vq->desc[desc->next];
+ desc = &descs[desc->next];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
if ((desc->flags & VRING_DESC_F_NEXT) == 0)
break;
- if (unlikely(desc->next >= vq->size ||
- ++nr_desc > vq->size))
+ if (unlikely(desc->next >= max_desc ||
+ ++nr_desc > max_desc))
+ return -1;
+ desc = &descs[desc->next];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc = &vq->desc[desc->next];
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
+ struct vring_desc *desc;
+ uint16_t sz, idx;
int err;
if (likely(i + 1 < count))
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+ if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+ desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+ vq->desc[desc_indexes[i]].addr);
+ if (unlikely(!desc))
+ break;
+
+ rte_prefetch0(desc);
+ sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+ idx = 0;
+ } else {
+ desc = vq->desc;
+ sz = vq->size;
+ idx = desc_indexes[i];
+ }
+
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
break;
}
- err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
- mbuf_pool);
+ err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
break;