vhost: make virtio header length per device
[dpdk.git] / lib / librte_vhost / vhost_rxtx.c
index b0bcd05..c9cd1c5 100644 (file)
@@ -94,8 +94,6 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
 static void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
-       memset(net_hdr, 0, sizeof(struct virtio_net_hdr));
-
        if (m_buf->ol_flags & PKT_TX_L4_MASK) {
                net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
                net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
@@ -125,15 +123,13 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
                net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
                                        + m_buf->l4_len;
        }
-
-       return;
 }
 
 static inline void
-copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr,
+copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
                    struct virtio_net_hdr_mrg_rxbuf hdr)
 {
-       if (vq->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
+       if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
                *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
        else
                *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
@@ -151,16 +147,19 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
        struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 
        desc = &vq->desc[desc_idx];
+       if (unlikely(desc->len < dev->vhost_hlen))
+               return -1;
+
        desc_addr = gpa_to_vva(dev, desc->addr);
        rte_prefetch0((void *)(uintptr_t)desc_addr);
 
        virtio_enqueue_offload(m, &virtio_hdr.hdr);
-       copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
-       vhost_log_write(dev, desc->addr, vq->vhost_hlen);
-       PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+       copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+       vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+       PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
 
-       desc_offset = vq->vhost_hlen;
-       desc_avail  = desc->len - vq->vhost_hlen;
+       desc_offset = dev->vhost_hlen;
+       desc_avail  = desc->len - dev->vhost_hlen;
 
        *copied = rte_pktmbuf_pkt_len(m);
        mbuf_avail  = rte_pktmbuf_data_len(m);
@@ -180,6 +179,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                /* Room in vring buffer is not enough */
                                return -1;
                        }
+                       if (unlikely(desc->next >= vq->size))
+                               return -1;
 
                        desc = &vq->desc[desc->next];
                        desc_addr   = gpa_to_vva(dev, desc->addr);
@@ -263,11 +264,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
        uint16_t desc_indexes[MAX_PKT_BURST];
        uint32_t i;
 
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+       LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
        if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-               RTE_LOG(ERR, VHOST_DATA,
-                       "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-                       __func__, dev->device_fh, queue_id);
+               RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+                       dev->vid, __func__, queue_id);
                return 0;
        }
 
@@ -279,9 +279,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
        if (count == 0)
                return 0;
 
-       LOG_DEBUG(VHOST_DATA,
-               "(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n",
-               dev->device_fh, res_start_idx, res_end_idx);
+       LOG_DEBUG(VHOST_DATA, "(%d) res_start_idx %d | res_end_idx Index %d\n",
+               dev->vid, res_start_idx, res_end_idx);
 
        /* Retrieve all of the desc indexes first to avoid caching issues. */
        rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]);
@@ -301,9 +300,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
                vq->used->ring[used_idx].id = desc_idx;
                if (unlikely(err))
-                       vq->used->ring[used_idx].len = vq->vhost_hlen;
+                       vq->used->ring[used_idx].len = dev->vhost_hlen;
                else
-                       vq->used->ring[used_idx].len = copied + vq->vhost_hlen;
+                       vq->used->ring[used_idx].len = copied + dev->vhost_hlen;
                vhost_log_used_vring(dev, vq,
                        offsetof(struct vring_used, ring[used_idx]),
                        sizeof(vq->used->ring[used_idx]));
@@ -312,7 +311,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
                        rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
        }
 
-       rte_compiler_barrier();
+       rte_smp_wmb();
 
        /* Wait until it's our turn to add our buffer to the used ring. */
        while (unlikely(vq->last_used_idx != res_start_idx))
@@ -328,7 +327,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
        rte_mb();
 
        /* Kick the guest if necessary. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+                       && (vq->callfd >= 0))
                eventfd_write(vq->callfd, (eventfd_t)1);
        return count;
 }
@@ -342,7 +342,7 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
        uint32_t len    = *allocated;
 
        while (1) {
-               if (vec_id >= BUF_VECTOR_MAX)
+               if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
                        return -1;
 
                len += vq->desc[idx].len;
@@ -441,24 +441,26 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
        if (unlikely(m == NULL))
                return 0;
 
-       LOG_DEBUG(VHOST_DATA,
-               "(%"PRIu64") Current Index %d| End Index %d\n",
-               dev->device_fh, cur_idx, res_end_idx);
+       LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+               dev->vid, cur_idx, res_end_idx);
+
+       if (vq->buf_vec[vec_idx].buf_len < dev->vhost_hlen)
+               return -1;
 
        desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
        rte_prefetch0((void *)(uintptr_t)desc_addr);
 
        virtio_hdr.num_buffers = res_end_idx - res_start_idx;
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
-               dev->device_fh, virtio_hdr.num_buffers);
+       LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
+               dev->vid, virtio_hdr.num_buffers);
 
        virtio_enqueue_offload(m, &virtio_hdr.hdr);
-       copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
-       vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen);
-       PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+       copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+       vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
+       PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
 
-       desc_avail  = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
-       desc_offset = vq->vhost_hlen;
+       desc_avail  = vq->buf_vec[vec_idx].buf_len - dev->vhost_hlen;
+       desc_offset = dev->vhost_hlen;
 
        mbuf_avail  = rte_pktmbuf_data_len(m);
        mbuf_offset = 0;
@@ -528,12 +530,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
        uint32_t pkt_idx = 0, nr_used = 0;
        uint16_t start, end;
 
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
-               dev->device_fh);
+       LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
        if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-               RTE_LOG(ERR, VHOST_DATA,
-                       "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-                       __func__, dev->device_fh, queue_id);
+               RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+                       dev->vid, __func__, queue_id);
                return 0;
        }
 
@@ -546,19 +546,19 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
                return 0;
 
        for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-               uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
+               uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
                if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
                                                         &start, &end) < 0)) {
                        LOG_DEBUG(VHOST_DATA,
-                               "(%" PRIu64 ") Failed to get enough desc from vring\n",
-                               dev->device_fh);
+                               "(%d) failed to get enough desc from vring\n",
+                               dev->vid);
                        break;
                }
 
                nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end,
                                                      pkts[pkt_idx]);
-               rte_compiler_barrier();
+               rte_smp_wmb();
 
                /*
                 * Wait until it's our turn to add our buffer
@@ -578,7 +578,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
                rte_mb();
 
                /* Kick the guest if necessary. */
-               if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+               if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+                               && (vq->callfd >= 0))
                        eventfd_write(vq->callfd, (eventfd_t)1);
        }
 
@@ -586,9 +587,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 }
 
 uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
        struct rte_mbuf **pkts, uint16_t count)
 {
+       struct virtio_net *dev = get_device(vid);
+
+       if (!dev)
+               return 0;
+
        if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
                return virtio_dev_merge_rx(dev, queue_id, pkts, count);
        else
@@ -737,21 +743,29 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
        uint32_t cpy_len;
        struct rte_mbuf *cur = m, *prev = m;
        struct virtio_net_hdr *hdr;
+       /* A counter to avoid desc dead loop chain */
+       uint32_t nr_desc = 1;
 
        desc = &vq->desc[desc_idx];
+       if (unlikely(desc->len < dev->vhost_hlen))
+               return -1;
+
        desc_addr = gpa_to_vva(dev, desc->addr);
        rte_prefetch0((void *)(uintptr_t)desc_addr);
 
        /* Retrieve virtio net header */
        hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-       desc_avail  = desc->len - vq->vhost_hlen;
-       desc_offset = vq->vhost_hlen;
+       desc_avail  = desc->len - dev->vhost_hlen;
+       desc_offset = dev->vhost_hlen;
 
        mbuf_offset = 0;
        mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
        while (desc_avail != 0 || (desc->flags & VRING_DESC_F_NEXT) != 0) {
                /* This desc reaches to its end, get the next one */
                if (desc_avail == 0) {
+                       if (unlikely(desc->next >= vq->size ||
+                                    ++nr_desc >= vq->size))
+                               return -1;
                        desc = &vq->desc[desc->next];
 
                        desc_addr = gpa_to_vva(dev, desc->addr);
@@ -806,9 +820,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 }
 
 uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
        struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
+       struct virtio_net *dev;
        struct rte_mbuf *rarp_mbuf = NULL;
        struct vhost_virtqueue *vq;
        uint32_t desc_indexes[MAX_PKT_BURST];
@@ -817,10 +832,13 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
        uint16_t free_entries;
        uint16_t avail_idx;
 
+       dev = get_device(vid);
+       if (!dev)
+               return 0;
+
        if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
-               RTE_LOG(ERR, VHOST_DATA,
-                       "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-                       __func__, dev->device_fh, queue_id);
+               RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+                       dev->vid, __func__, queue_id);
                return 0;
        }
 
@@ -856,7 +874,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
        if (free_entries == 0)
                goto out;
 
-       LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh);
+       LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
        /* Prefetch available ring to retrieve head indexes. */
        used_idx = vq->last_used_idx & (vq->size - 1);
@@ -864,8 +882,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 
        count = RTE_MIN(count, MAX_PKT_BURST);
        count = RTE_MIN(count, free_entries);
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n",
-                       dev->device_fh, count);
+       LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+                       dev->vid, count);
 
        /* Retrieve all of the head indexes first to avoid caching issues. */
        for (i = 0; i < count; i++) {
@@ -907,13 +925,15 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
                                sizeof(vq->used->ring[used_idx]));
        }
 
-       rte_compiler_barrier();
+       rte_smp_wmb();
+       rte_smp_rmb();
        vq->used->idx += i;
        vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
                        sizeof(vq->used->idx));
 
        /* Kick guest if required. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+                       && (vq->callfd >= 0))
                eventfd_write(vq->callfd, (eventfd_t)1);
 
 out: