vhost: add batch datapath for async packed ring
authorCheng Jiang <cheng1.jiang@intel.com>
Tue, 27 Apr 2021 08:03:34 +0000 (08:03 +0000)
committerMaxime Coquelin <maxime.coquelin@redhat.com>
Tue, 4 May 2021 08:21:59 +0000 (10:21 +0200)
Add batch datapath for async vhost packed ring to improve the
performance of small packet processing.

Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
lib/vhost/virtio_net.c

index f744675..1a34867 100644 (file)
@@ -1724,6 +1724,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,
        vq->desc_packed[head_idx].flags = head_flags;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+                          struct vhost_virtqueue *vq,
+                          struct rte_mbuf **pkts,
+                          struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+       uint16_t i;
+       uint32_t cpy_threshold = vq->async_threshold;
+
+       vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+               if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+                       return -1;
+       }
+       if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+               vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+                       comp_pkts[(*pkt_done)++] = pkts[i];
+
+               return 0;
+       }
+
+       return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
                            struct vhost_virtqueue *vq,
@@ -1872,6 +1895,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
        struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
        uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+       uint32_t remained = count;
        uint16_t async_descs_idx = 0;
        uint16_t num_buffers;
        uint16_t num_descs;
@@ -1889,12 +1913,19 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
        uint32_t num_async_pkts = 0, num_done_pkts = 0;
        struct vring_packed_desc async_descs[vq->size];
 
-       rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+       do {
+               rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+               if (remained >= PACKED_BATCH_SIZE) {
+                       if (!virtio_dev_rx_async_batch_packed(dev, vq,
+                               &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+                               pkt_idx += PACKED_BATCH_SIZE;
+                               remained -= PACKED_BATCH_SIZE;
+                               continue;
+                       }
+               }
 
-       for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
                num_buffers = 0;
                num_descs = 0;
-
                if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],
                                                &num_descs, &num_buffers,
                                                &async_descs[async_descs_idx],
@@ -1937,6 +1968,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
                        comp_pkts[num_done_pkts++] = pkts[pkt_idx];
                }
 
+               pkt_idx++;
+               remained--;
                vq_inc_last_avail_packed(vq, num_descs);
 
                /*
@@ -1961,13 +1994,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
                                 */
                                pkt_err = pkt_burst_idx - n_pkts;
                                pkt_burst_idx = 0;
-                               pkt_idx++;
                                break;
                        }
 
                        pkt_burst_idx = 0;
                }
-       }
+       } while (pkt_idx < count);
 
        if (pkt_burst_idx) {
                n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);