vhost: handle virtually non-contiguous buffers in Rx-mrg
[dpdk.git] / lib / librte_vhost / virtio_net.c
index dcbfbd5..5fdd417 100644 (file)
@@ -221,9 +221,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
        uint32_t desc_avail, desc_offset;
        uint32_t mbuf_avail, mbuf_offset;
        uint32_t cpy_len;
-       uint64_t dlen;
+       uint64_t desc_chunck_len;
        struct vring_desc *desc;
-       uint64_t desc_addr;
+       uint64_t desc_addr, desc_gaddr;
        /* A counter to avoid desc dead loop chain */
        uint16_t nr_desc = 1;
        struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
@@ -231,28 +231,74 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
        int error = 0;
 
        desc = &descs[desc_idx];
-       dlen = desc->len;
-       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-                                       &dlen, VHOST_ACCESS_RW);
+       desc_chunck_len = desc->len;
+       desc_gaddr = desc->addr;
+       desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
+                                       &desc_chunck_len, VHOST_ACCESS_RW);
        /*
         * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
         * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
         * otherwise stores offset on the stack instead of in a register.
         */
-       if (unlikely(dlen != desc->len || desc->len < dev->vhost_hlen) ||
-                       !desc_addr) {
+       if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
                error = -1;
                goto out;
        }
 
        rte_prefetch0((void *)(uintptr_t)desc_addr);
 
-       virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr);
-       vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-       PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+       if (likely(desc_chunck_len >= dev->vhost_hlen)) {
+               virtio_enqueue_offload(m,
+                               (struct virtio_net_hdr *)(uintptr_t)desc_addr);
+               PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+               vhost_log_write(dev, desc_gaddr, dev->vhost_hlen);
+       } else {
+               struct virtio_net_hdr vnet_hdr;
+               uint64_t remain = dev->vhost_hlen;
+               uint64_t len;
+               uint64_t src = (uint64_t)(uintptr_t)&vnet_hdr, dst;
+               uint64_t guest_addr = desc_gaddr;
+
+               virtio_enqueue_offload(m, &vnet_hdr);
+
+               while (remain) {
+                       len = remain;
+                       dst = vhost_iova_to_vva(dev, vq, guest_addr,
+                                       &len, VHOST_ACCESS_RW);
+                       if (unlikely(!dst || !len)) {
+                               error = -1;
+                               goto out;
+                       }
+
+                       rte_memcpy((void *)(uintptr_t)dst,
+                                       (void *)(uintptr_t)src, len);
+
+                       PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
+                       vhost_log_write(dev, guest_addr, len);
+                       remain -= len;
+                       guest_addr += len;
+                       dst += len;
+               }
+       }
 
-       desc_offset = dev->vhost_hlen;
        desc_avail  = desc->len - dev->vhost_hlen;
+       if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+               desc_chunck_len = desc_avail;
+               desc_gaddr = desc->addr + dev->vhost_hlen;
+               desc_addr = vhost_iova_to_vva(dev,
+                               vq, desc_gaddr,
+                               &desc_chunck_len,
+                               VHOST_ACCESS_RW);
+               if (unlikely(!desc_addr)) {
+                       error = -1;
+                       goto out;
+               }
+
+               desc_offset = 0;
+       } else {
+               desc_offset = dev->vhost_hlen;
+               desc_chunck_len -= dev->vhost_hlen;
+       }
 
        mbuf_avail  = rte_pktmbuf_data_len(m);
        mbuf_offset = 0;
@@ -278,26 +324,38 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        }
 
                        desc = &descs[desc->next];
-                       dlen = desc->len;
-                       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-                                                       &dlen,
+                       desc_chunck_len = desc->len;
+                       desc_gaddr = desc->addr;
+                       desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
+                                                       &desc_chunck_len,
                                                        VHOST_ACCESS_RW);
-                       if (unlikely(!desc_addr || dlen != desc->len)) {
+                       if (unlikely(!desc_addr)) {
                                error = -1;
                                goto out;
                        }
 
                        desc_offset = 0;
                        desc_avail  = desc->len;
+               } else if (unlikely(desc_chunck_len == 0)) {
+                       desc_chunck_len = desc_avail;
+                       desc_gaddr += desc_offset;
+                       desc_addr = vhost_iova_to_vva(dev,
+                                       vq, desc_gaddr,
+                                       &desc_chunck_len, VHOST_ACCESS_RW);
+                       if (unlikely(!desc_addr)) {
+                               error = -1;
+                               goto out;
+                       }
+                       desc_offset = 0;
                }
 
-               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+               cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
                if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
                        rte_memcpy((void *)((uintptr_t)(desc_addr +
                                                        desc_offset)),
                                rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
                                cpy_len);
-                       vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+                       vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
                        PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
                                     cpy_len, 0);
                } else {
@@ -305,7 +363,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                (void *)((uintptr_t)(desc_addr + desc_offset));
                        batch_copy[copy_nb].src =
                                rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-                       batch_copy[copy_nb].log_addr = desc->addr + desc_offset;
+                       batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
                        batch_copy[copy_nb].len = cpy_len;
                        copy_nb++;
                }
@@ -314,6 +372,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
                mbuf_offset += cpy_len;
                desc_avail  -= cpy_len;
                desc_offset += cpy_len;
+               desc_chunck_len -= cpy_len;
        }
 
 out:
@@ -579,14 +638,15 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                            uint16_t num_buffers)
 {
        uint32_t vec_idx = 0;
-       uint64_t desc_addr;
+       uint64_t desc_addr, desc_gaddr;
        uint32_t mbuf_offset, mbuf_avail;
        uint32_t desc_offset, desc_avail;
        uint32_t cpy_len;
-       uint64_t dlen;
+       uint64_t desc_chunck_len;
        uint64_t hdr_addr, hdr_phys_addr;
        struct rte_mbuf *hdr_mbuf;
        struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+       struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
        uint16_t copy_nb = vq->batch_copy_nb_elems;
        int error = 0;
 
@@ -595,26 +655,48 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                goto out;
        }
 
-       dlen = buf_vec[vec_idx].buf_len;
-       desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
-                                               &dlen, VHOST_ACCESS_RW);
-       if (dlen != buf_vec[vec_idx].buf_len ||
-                       buf_vec[vec_idx].buf_len < dev->vhost_hlen ||
-                       !desc_addr) {
+       desc_chunck_len = buf_vec[vec_idx].buf_len;
+       desc_gaddr = buf_vec[vec_idx].buf_addr;
+       desc_addr = vhost_iova_to_vva(dev, vq,
+                                       desc_gaddr,
+                                       &desc_chunck_len,
+                                       VHOST_ACCESS_RW);
+       if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
                error = -1;
                goto out;
        }
 
        hdr_mbuf = m;
        hdr_addr = desc_addr;
-       hdr_phys_addr = buf_vec[vec_idx].buf_addr;
+       if (unlikely(desc_chunck_len < dev->vhost_hlen))
+               hdr = &tmp_hdr;
+       else
+               hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
+       hdr_phys_addr = desc_gaddr;
        rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
                dev->vid, num_buffers);
 
        desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-       desc_offset = dev->vhost_hlen;
+       if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+               desc_chunck_len = desc_avail;
+               desc_gaddr += dev->vhost_hlen;
+               desc_addr = vhost_iova_to_vva(dev, vq,
+                               desc_gaddr,
+                               &desc_chunck_len,
+                               VHOST_ACCESS_RW);
+               if (unlikely(!desc_addr)) {
+                       error = -1;
+                       goto out;
+               }
+
+               desc_offset = 0;
+       } else {
+               desc_offset = dev->vhost_hlen;
+               desc_chunck_len -= dev->vhost_hlen;
+       }
+
 
        mbuf_avail  = rte_pktmbuf_data_len(m);
        mbuf_offset = 0;
@@ -622,14 +704,14 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                /* done with current desc buf, get the next one */
                if (desc_avail == 0) {
                        vec_idx++;
-                       dlen = buf_vec[vec_idx].buf_len;
+                       desc_chunck_len = buf_vec[vec_idx].buf_len;
+                       desc_gaddr = buf_vec[vec_idx].buf_addr;
                        desc_addr =
                                vhost_iova_to_vva(dev, vq,
-                                       buf_vec[vec_idx].buf_addr,
-                                       &dlen,
+                                       desc_gaddr,
+                                       &desc_chunck_len,
                                        VHOST_ACCESS_RW);
-                       if (unlikely(!desc_addr ||
-                                       dlen != buf_vec[vec_idx].buf_len)) {
+                       if (unlikely(!desc_addr)) {
                                error = -1;
                                goto out;
                        }
@@ -638,6 +720,17 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        rte_prefetch0((void *)(uintptr_t)desc_addr);
                        desc_offset = 0;
                        desc_avail  = buf_vec[vec_idx].buf_len;
+               } else if (unlikely(desc_chunck_len == 0)) {
+                       desc_chunck_len = desc_avail;
+                       desc_gaddr += desc_offset;
+                       desc_addr = vhost_iova_to_vva(dev, vq,
+                                       desc_gaddr,
+                                       &desc_chunck_len, VHOST_ACCESS_RW);
+                       if (unlikely(!desc_addr)) {
+                               error = -1;
+                               goto out;
+                       }
+                       desc_offset = 0;
                }
 
                /* done with current mbuf, get the next one */
@@ -649,30 +742,55 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                }
 
                if (hdr_addr) {
-                       struct virtio_net_hdr_mrg_rxbuf *hdr;
-
-                       hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)
-                               hdr_addr;
                        virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
                        ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
 
-                       vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
-                       PRINT_PACKET(dev, (uintptr_t)hdr_addr,
-                                    dev->vhost_hlen, 0);
+                       if (unlikely(hdr == &tmp_hdr)) {
+                               uint64_t len;
+                               uint64_t remain = dev->vhost_hlen;
+                               uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+                               uint64_t guest_addr = hdr_phys_addr;
+
+                               while (remain) {
+                                       len = remain;
+                                       dst = vhost_iova_to_vva(dev, vq,
+                                                       guest_addr, &len,
+                                                       VHOST_ACCESS_RW);
+                                       if (unlikely(!dst || !len)) {
+                                               error = -1;
+                                               goto out;
+                                       }
+
+                                       rte_memcpy((void *)(uintptr_t)dst,
+                                                       (void *)(uintptr_t)src,
+                                                       len);
+
+                                       PRINT_PACKET(dev, (uintptr_t)dst,
+                                                       (uint32_t)len, 0);
+                                       vhost_log_write(dev, guest_addr, len);
+
+                                       remain -= len;
+                                       guest_addr += len;
+                                       dst += len;
+                               }
+                       } else {
+                               PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+                                               dev->vhost_hlen, 0);
+                               vhost_log_write(dev, hdr_phys_addr,
+                                               dev->vhost_hlen);
+                       }
 
                        hdr_addr = 0;
                }
 
-               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+               cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
 
                if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
                        rte_memcpy((void *)((uintptr_t)(desc_addr +
                                                        desc_offset)),
                                rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
                                cpy_len);
-                       vhost_log_write(dev,
-                               buf_vec[vec_idx].buf_addr + desc_offset,
-                               cpy_len);
+                       vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
                        PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
                                cpy_len, 0);
                } else {
@@ -680,8 +798,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                (void *)((uintptr_t)(desc_addr + desc_offset));
                        batch_copy[copy_nb].src =
                                rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-                       batch_copy[copy_nb].log_addr =
-                               buf_vec[vec_idx].buf_addr + desc_offset;
+                       batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
                        batch_copy[copy_nb].len = cpy_len;
                        copy_nb++;
                }
@@ -690,6 +807,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                mbuf_offset += cpy_len;
                desc_avail  -= cpy_len;
                desc_offset += cpy_len;
+               desc_chunck_len -= cpy_len;
        }
 
 out: