virtio: use indirect ring elements
authorStephen Hemminger <stephen@networkplumber.org>
Fri, 4 Mar 2016 18:19:19 +0000 (10:19 -0800)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Wed, 16 Mar 2016 18:05:25 +0000 (19:05 +0100)
The virtio ring in QEMU/KVM is usually limited to 256 entries
and the normal way that virtio driver was queuing mbufs required
nsegs + 1 ring elements. By using the indirect ring element feature
if available, each packet will take only one ring slot even for
multi-segment packets.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: Huawei Xie <huawei.xie@intel.com>
drivers/net/virtio/virtio_ethdev.c
drivers/net/virtio/virtio_rxtx.c
drivers/net/virtio/virtqueue.h

index ebefdb4..a105f5b 100644 (file)
@@ -387,27 +387,47 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
        vq->virtio_net_hdr_mem = 0;
 
        if (queue_type == VTNET_TQ) {
+               const struct rte_memzone *hdr_mz;
+               struct virtio_tx_region *txr;
+               unsigned int i;
+
                /*
                 * For each xmit packet, allocate a virtio_net_hdr
+                * and indirect ring elements
                 */
                snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
-                       dev->data->port_id, queue_idx);
-               vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-                       vq_size * hw->vtnet_hdr_size,
-                       socket_id, 0, RTE_CACHE_LINE_SIZE);
-               if (vq->virtio_net_hdr_mz == NULL) {
+                        dev->data->port_id, queue_idx);
+               hdr_mz = rte_memzone_reserve_aligned(vq_name,
+                                                    vq_size * sizeof(*txr),
+                                                    socket_id, 0,
+                                                    RTE_CACHE_LINE_SIZE);
+               if (hdr_mz == NULL) {
                        if (rte_errno == EEXIST)
-                               vq->virtio_net_hdr_mz =
-                                       rte_memzone_lookup(vq_name);
-                       if (vq->virtio_net_hdr_mz == NULL) {
+                               hdr_mz = rte_memzone_lookup(vq_name);
+                       if (hdr_mz == NULL) {
                                rte_free(vq);
                                return -ENOMEM;
                        }
                }
-               vq->virtio_net_hdr_mem =
-                       vq->virtio_net_hdr_mz->phys_addr;
-               memset(vq->virtio_net_hdr_mz->addr, 0,
-                       vq_size * hw->vtnet_hdr_size);
+               vq->virtio_net_hdr_mz = hdr_mz;
+               vq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+
+               txr = hdr_mz->addr;
+               memset(txr, 0, vq_size * sizeof(*txr));
+               for (i = 0; i < vq_size; i++) {
+                       struct vring_desc *start_dp = txr[i].tx_indir;
+
+                       vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
+
+                       /* first indirect descriptor is always the tx header */
+                       start_dp->addr = vq->virtio_net_hdr_mem
+                               + i * sizeof(*txr)
+                               + offsetof(struct virtio_tx_region, tx_hdr);
+
+                       start_dp->len = vq->hw->vtnet_hdr_size;
+                       start_dp->flags = VRING_DESC_F_NEXT;
+               }
+
        } else if (queue_type == VTNET_CQ) {
                /* Allocate a page for control vq command, data and status */
                snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
index 12da0aa..98b3809 100644 (file)
@@ -209,14 +209,15 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 }
 
 static int
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
+virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
+                      int use_indirect)
 {
        struct vq_desc_extra *dxp;
        struct vring_desc *start_dp;
        uint16_t seg_num = cookie->nb_segs;
-       uint16_t needed = 1 + seg_num;
+       uint16_t needed = use_indirect ? 1 : 1 + seg_num;
        uint16_t head_idx, idx;
-       size_t head_size = txvq->hw->vtnet_hdr_size;
+       unsigned long offs;
 
        if (unlikely(txvq->vq_free_cnt == 0))
                return -ENOSPC;
@@ -232,10 +233,37 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
        dxp->ndescs = needed;
 
        start_dp = txvq->vq_ring.desc;
-       start_dp[idx].addr =
-               txvq->virtio_net_hdr_mem + idx * head_size;
-       start_dp[idx].len = head_size;
-       start_dp[idx].flags = VRING_DESC_F_NEXT;
+
+       if (use_indirect) {
+               /* setup tx ring slot to point to indirect
+                * descriptor list stored in reserved region.
+                *
+                * the first slot in indirect ring is already preset
+                * to point to the header in reserved region
+                */
+               struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
+
+               offs = idx * sizeof(struct virtio_tx_region)
+                       + offsetof(struct virtio_tx_region, tx_indir);
+
+               start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
+               start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
+               start_dp[idx].flags = VRING_DESC_F_INDIRECT;
+
+               /* loop below will fill in rest of the indirect elements */
+               start_dp = txr[idx].tx_indir;
+               idx = 0;
+       } else {
+               /* setup first tx ring slot to point to header
+                * stored in reserved region.
+                */
+               offs = idx * sizeof(struct virtio_tx_region)
+                       + offsetof(struct virtio_tx_region, tx_hdr);
+
+               start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
+               start_dp[idx].len   = txvq->hw->vtnet_hdr_size;
+               start_dp[idx].flags = VRING_DESC_F_NEXT;
+       }
 
        for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
                idx = start_dp[idx].next;
@@ -246,7 +274,12 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
        }
 
        start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-       idx = start_dp[idx].next;
+
+       if (use_indirect)
+               idx = txvq->vq_ring.desc[head_idx].next;
+       else
+               idx = start_dp[idx].next;
+
        txvq->vq_desc_head_idx = idx;
        if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
                txvq->vq_desc_tail_idx = idx;
@@ -289,10 +322,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
        vq->vq_free_cnt = vq->vq_nentries;
        memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
 
-       /* Chain all the descriptors in the ring with an END */
-       for (i = 0; i < size - 1; i++)
-               vr->desc[i].next = (uint16_t)(i + 1);
-       vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
+       vring_desc_init(vr->desc, size);
 
        /*
         * Disable device(host) interrupting guest
@@ -852,8 +882,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                struct rte_mbuf *txm = tx_pkts[nb_tx];
-               /* Need one more descriptor for virtio header. */
-               int need = txm->nb_segs - txvq->vq_free_cnt + 1;
+               int use_indirect, slots, need;
+
+               use_indirect = vtpci_with_feature(txvq->hw,
+                                                 VIRTIO_RING_F_INDIRECT_DESC)
+                       && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT);
+
+               /* How many main ring entries are needed to this Tx? */
+               slots = use_indirect ? 1 : 1 + txm->nb_segs;
+               need = slots - txvq->vq_free_cnt;
 
                /* Positive value indicates it need free vring descriptors */
                if (unlikely(need > 0)) {
@@ -862,7 +899,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        need = RTE_MIN(need, (int)nb_used);
 
                        virtio_xmit_cleanup(txvq, need);
-                       need = txm->nb_segs - txvq->vq_free_cnt + 1;
+                       need = slots - txvq->vq_free_cnt;
                        if (unlikely(need > 0)) {
                                PMD_TX_LOG(ERR,
                                           "No free tx descriptors to transmit");
@@ -880,7 +917,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                }
 
                /* Enqueue Packet buffers */
-               error = virtqueue_enqueue_xmit(txvq, txm);
+               error = virtqueue_enqueue_xmit(txvq, txm, use_indirect);
                if (unlikely(error)) {
                        if (error == ENOSPC)
                                PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
index 68e0b4b..4e9239e 100644 (file)
@@ -243,6 +243,25 @@ struct virtio_net_hdr_mrg_rxbuf {
        uint16_t num_buffers; /**< Number of merged rx buffers */
 };
 
+/* Region reserved to allow for transmit header and indirect ring */
+#define VIRTIO_MAX_TX_INDIRECT 8
+struct virtio_tx_region {
+       struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+       struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
+                          __attribute__((__aligned__(16)));
+};
+
+/* Chain all the descriptors in the ring with an END */
+static inline void
+vring_desc_init(struct vring_desc *dp, uint16_t n)
+{
+       uint16_t i;
+
+       for (i = 0; i < n - 1; i++)
+               dp[i].next = (uint16_t)(i + 1);
+       dp[i].next = VQ_RING_DESC_CHAIN_END;
+}
+
 /**
  * Tell the backend not to interrupt us.
  */