From 6e9893c5ef586c95cdb4cf87f17a826de1b26e66 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 12 Jan 2016 18:08:33 -0800 Subject: [PATCH] vmxnet3: restore Tx data ring support Tx data ring support was removed in a previous change that added multi-seg transmit. This change adds it back. According to the original commit (2e849373), 64B pkt rate with l2fwd improved by ~20% on an Ivy Bridge server at which point we start to hit some bottleneck on the rx side. I also re-did the same test on a different setup (Haswell processor, ~2.3GHz clock rate) on top of the master and still observed ~17% performance gains. Fixes: 7ba5de417e3c ("vmxnet3: support multi-segment transmit") Signed-off-by: Yong Wang Acked-by: Stephen Hemminger --- doc/guides/rel_notes/release_16_04.rst | 5 +++++ drivers/net/vmxnet3/vmxnet3_rxtx.c | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst index 06879e02c0..522c4630b3 100644 --- a/doc/guides/rel_notes/release_16_04.rst +++ b/doc/guides/rel_notes/release_16_04.rst @@ -47,6 +47,11 @@ This section should contain new features added in this release. Sample format: A new function ``rte_pktmbuf_alloc_bulk()`` has been added to allow the user to allocate a bulk of mbufs. +* **Restored vmxnet3 Tx data ring.** + + Tx data ring has been shown to improve small pkt forwarding performance + on vSphere environment. + * **Virtio 1.0.** Enabled virtio 1.0 support for virtio pmd driver. diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index 8a8e441140..5f207452b6 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -342,6 +342,7 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint32_t first2fill, avail, dw2; struct rte_mbuf *txm = tx_pkts[nb_tx]; struct rte_mbuf *m_seg = txm; + int copy_size = 0; /* Is this packet execessively fragmented, then drop */ if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) { @@ -359,6 +360,14 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, break; } + if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { + struct Vmxnet3_TxDataDesc *tdd; + + tdd = txq->data_ring.base + txq->cmd_ring.next2fill; + copy_size = rte_pktmbuf_pkt_len(txm); + rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); + } + /* use the previous gen bit for the SOP desc */ dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; first2fill = txq->cmd_ring.next2fill; @@ -371,7 +380,13 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, transmit buffer size (16K) is greater than maximum sizeof mbuf segment size. */ gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; - gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); + if (copy_size) + gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + + txq->cmd_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc)); + else + gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); + gdesc->dword[2] = dw2 | m_seg->data_len; gdesc->dword[3] = 0; -- 2.20.1