From 32c118fd005912b835f1eb94facc467549fab783 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 9 Feb 2015 09:14:05 +0800 Subject: [PATCH] virtio: free mbuf's with threshold This makes virtio driver work like ixgbe. Transmit buffers are held until a transmit threshold is reached. The previous behavior was to hold mbuf's until the ring entry was reused which caused more memory usage than needed. Signed-off-by: Stephen Hemminger Signed-off-by: Changchun Ouyang Acked-by: Huawei Xie --- lib/librte_pmd_virtio/virtio_ethdev.c | 7 +-- lib/librte_pmd_virtio/virtio_rxtx.c | 75 +++++++++++++++++++-------- lib/librte_pmd_virtio/virtqueue.h | 3 +- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c index 6c224d41d3..7556e117e2 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.c +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -176,15 +176,16 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, virtqueue_notify(vq); - while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + rte_rmb(); + while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) { + rte_rmb(); usleep(100); + } while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) { uint32_t idx, desc_idx, used_idx; struct vring_used_elem *uep; - virtio_rmb(); - used_idx = (uint32_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); uep = &vq->vq_ring.used->ring[used_idx]; diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c index 9026cdd0a5..126323f820 100644 --- a/lib/librte_pmd_virtio/virtio_rxtx.c +++ b/lib/librte_pmd_virtio/virtio_rxtx.c @@ -129,17 +129,32 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, return i; } +#ifndef DEFAULT_TX_FREE_THRESH +#define DEFAULT_TX_FREE_THRESH 32 +#endif + +/* Cleanup from completed transmits. */ static void -virtqueue_dequeue_pkt_tx(struct virtqueue *vq) +virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) { - struct vring_used_elem *uep; - uint16_t used_idx, desc_idx; + uint16_t i, used_idx, desc_idx; + for (i = 0; i < num; i++) { + struct vring_used_elem *uep; + struct vq_desc_extra *dxp; + + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + + desc_idx = (uint16_t) uep->id; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); - used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - uep = &vq->vq_ring.used->ring[used_idx]; - desc_idx = (uint16_t) uep->id; - vq->vq_used_cons_idx++; - vq_ring_free_chain(vq, desc_idx); + if (dxp->cookie != NULL) { + rte_pktmbuf_free(dxp->cookie); + dxp->cookie = NULL; + } + } } @@ -203,8 +218,6 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) idx = head_idx; dxp = &txvq->vq_descx[idx]; - if (dxp->cookie != NULL) - rte_pktmbuf_free(dxp->cookie); dxp->cookie = (void *)cookie; dxp->ndescs = needed; @@ -404,6 +417,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, { uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; struct virtqueue *vq; + uint16_t tx_free_thresh; int ret; PMD_INIT_FUNC_TRACE(); @@ -421,6 +435,22 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, return ret; } + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { + RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, + tx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + + vq->vq_free_thresh = tx_free_thresh; + dev->data->tx_queues[queue_idx] = vq; return 0; } @@ -691,11 +721,9 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct virtqueue *txvq = tx_queue; struct rte_mbuf *txm; - uint16_t nb_used, nb_tx, num; + uint16_t nb_used, nb_tx; int error; - nb_tx = 0; - if (unlikely(nb_pkts < 1)) return nb_pkts; @@ -703,21 +731,26 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) nb_used = VIRTQUEUE_NUSED(txvq); virtio_rmb(); + if (likely(nb_used > txvq->vq_free_thresh)) + virtio_xmit_cleanup(txvq, nb_used); - num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); + nb_tx = 0; while (nb_tx < nb_pkts) { /* Need one more descriptor for virtio header. */ int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1; - int deq_cnt = RTE_MIN(need, (int)num); - num -= (deq_cnt > 0) ? deq_cnt : 0; - while (deq_cnt > 0) { - virtqueue_dequeue_pkt_tx(txvq); - deq_cnt--; + /*Positive value indicates it need free vring descriptors */ + if (unlikely(need > 0)) { + nb_used = VIRTQUEUE_NUSED(txvq); + virtio_rmb(); + need = RTE_MIN(need, (int)nb_used); + + virtio_xmit_cleanup(txvq, need); + need = (int)tx_pkts[nb_tx]->nb_segs - + txvq->vq_free_cnt + 1; } - need = (int)tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1; /* * Zero or negative value indicates it has enough free * descriptors to use for transmitting. @@ -726,7 +759,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) txm = tx_pkts[nb_tx]; /* Do VLAN tag insertion */ - if (txm->ol_flags & PKT_TX_VLAN_PKT) { + if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { error = rte_vlan_insert(&txm); if (unlikely(error)) { rte_pktmbuf_free(txm); diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h index c959a2b93a..41dda506cb 100644 --- a/lib/librte_pmd_virtio/virtqueue.h +++ b/lib/librte_pmd_virtio/virtqueue.h @@ -164,6 +164,7 @@ struct virtqueue { struct rte_mempool *mpool; /**< mempool for mbuf allocation */ uint16_t queue_id; /**< DPDK queue index. */ uint8_t port_id; /**< Device port identifier. */ + uint16_t vq_queue_index; /**< PCI queue index */ void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; @@ -172,7 +173,7 @@ struct virtqueue { struct vring vq_ring; /**< vring keeping desc, used and avail */ uint16_t vq_free_cnt; /**< num of desc available */ uint16_t vq_nentries; /**< vring desc numbers */ - uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t vq_free_thresh; /**< free threshold */ /** * Head of the free chain in the descriptor table. If * there are no free descriptors, this will be set to -- 2.20.1