vmxnet3: leverage data ring on Tx path
authorYong Wang <yongwang@vmware.com>
Wed, 5 Nov 2014 01:49:43 +0000 (17:49 -0800)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 14 Nov 2014 16:32:27 +0000 (17:32 +0100)
Data_ring is a pre-mapped guest ring buffer that vmxnet3
backend has access to directly without a need for buffer
address mapping and unmapping during packet transmission.
It is useful in reducing device emulation cost on the tx
path.  There are some additional cost though on the guest
driver for packet copy and overall it's a win.

This patch leverages the data_ring for packets with a
length less than or equal to the data_ring entry size
(128B).  For larger packet, we won't use the data_ring
as that requires one extra tx descriptor and it's not
clear if doing this will be beneficial.

Performance results show that this patch significantly
boosts vmxnet3 64B tx performance (pkt rate) for l2fwd
application on a Ivy Bridge server by >20% at which
point we start to hit some bottleneck on the rx side.

Signed-off-by: Yong Wang <yongwang@vmware.com>
lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c
lib/librte_pmd_vmxnet3/vmxnet3_ring.h
lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c

index c6e69f2..64789ac 100644 (file)
@@ -401,15 +401,17 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 
        for (i = 0; i < hw->num_tx_queues; i++) {
                Vmxnet3_TxQueueDesc *tqd = &hw->tqd_start[i];
-               vmxnet3_tx_queue_t *txq   = dev->data->tx_queues[i];
+               vmxnet3_tx_queue_t *txq  = dev->data->tx_queues[i];
 
                tqd->ctrl.txNumDeferred  = 0;
                tqd->ctrl.txThreshold    = 1;
                tqd->conf.txRingBasePA   = txq->cmd_ring.basePA;
                tqd->conf.compRingBasePA = txq->comp_ring.basePA;
+               tqd->conf.dataRingBasePA = txq->data_ring.basePA;
 
                tqd->conf.txRingSize   = txq->cmd_ring.size;
                tqd->conf.compRingSize = txq->comp_ring.size;
+               tqd->conf.dataRingSize = txq->data_ring.size;
                tqd->conf.intrIdx      = txq->comp_ring.intr_idx;
                tqd->status.stopped    = TRUE;
                tqd->status.error      = 0;
@@ -418,7 +420,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 
        for (i = 0; i < hw->num_rx_queues; i++) {
                Vmxnet3_RxQueueDesc *rqd  = &hw->rqd_start[i];
-               vmxnet3_rx_queue_t *rxq    = dev->data->rx_queues[i];
+               vmxnet3_rx_queue_t *rxq   = dev->data->rx_queues[i];
 
                rqd->conf.rxRingBasePA[0] = rxq->cmd_ring[0].basePA;
                rqd->conf.rxRingBasePA[1] = rxq->cmd_ring[1].basePA;
@@ -583,7 +585,6 @@ vmxnet3_dev_close(struct rte_eth_dev *dev)
 
        vmxnet3_dev_stop(dev);
        hw->adapter_stopped = TRUE;
-
 }
 
 static void
index 7a5dd5f..c5abdb6 100644 (file)
@@ -51,9 +51,9 @@
 
 typedef struct vmxnet3_buf_info {
        uint16_t               len;
-       struct rte_mbuf       *m;
-       uint64_t             bufPA;
-}vmxnet3_buf_info_t;
+       struct rte_mbuf        *m;
+       uint64_t               bufPA;
+} vmxnet3_buf_info_t;
 
 typedef struct vmxnet3_cmd_ring {
        vmxnet3_buf_info_t     *buf_info;
@@ -104,6 +104,12 @@ typedef struct vmxnet3_comp_ring {
        uint64_t               basePA;
 } vmxnet3_comp_ring_t;
 
+struct vmxnet3_data_ring {
+       struct Vmxnet3_TxDataDesc *base;
+       uint32_t                  size;
+       uint64_t                  basePA;
+};
+
 static inline void
 vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring)
 {
@@ -143,6 +149,7 @@ typedef struct vmxnet3_tx_queue {
        struct vmxnet3_hw            *hw;
        struct vmxnet3_cmd_ring      cmd_ring;
        struct vmxnet3_comp_ring     comp_ring;
+       struct vmxnet3_data_ring     data_ring;
        uint32_t                     qid;
        struct Vmxnet3_TxQueueDesc   *shared;
        struct vmxnet3_txq_stats     stats;
index 4799f4d..6c69f84 100644 (file)
@@ -78,7 +78,6 @@
 #include "vmxnet3_logs.h"
 #include "vmxnet3_ethdev.h"
 
-
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
        (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
@@ -144,11 +143,12 @@ vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
        if (txq == NULL)
                return;
 
-       PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
-                  txq->cmd_ring.base, txq->comp_ring.base);
-       PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
+       PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
+                  txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
+       PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
                   (unsigned long)txq->cmd_ring.basePA,
-                  (unsigned long)txq->comp_ring.basePA);
+                  (unsigned long)txq->comp_ring.basePA,
+                  (unsigned long)txq->data_ring.basePA);
 
        avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
        PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
@@ -213,6 +213,7 @@ vmxnet3_dev_tx_queue_reset(void *txq)
        vmxnet3_tx_queue_t *tq = txq;
        struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
        struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
+       struct vmxnet3_data_ring *data_ring = &tq->data_ring;
        int size;
 
        if (tq != NULL) {
@@ -229,6 +230,7 @@ vmxnet3_dev_tx_queue_reset(void *txq)
 
        size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
        size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
+       size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
 
        memset(ring->base, 0, size);
 }
@@ -342,7 +344,7 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        hw = txq->hw;
 
-       if (txq->stopped) {
+       if (unlikely(txq->stopped)) {
                PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
                return 0;
        }
@@ -354,6 +356,7 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        while (nb_tx < nb_pkts) {
 
                if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
+                       int copy_size = 0;
 
                        txm = tx_pkts[nb_tx];
                        /* Don't support scatter packets yet, free them if met */
@@ -377,11 +380,23 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        }
 
                        txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
+                       if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
+                               struct Vmxnet3_TxDataDesc *tdd;
+
+                               tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
+                               copy_size = rte_pktmbuf_pkt_len(txm);
+                               rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
+                       }
 
                        /* Fill the tx descriptor */
                        tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
                        tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
-                       txd->addr = tbi->bufPA;
+                       if (copy_size)
+                               txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
+                                                       txq->cmd_ring.next2fill *
+                                                       sizeof(struct Vmxnet3_TxDataDesc));
+                       else
+                               txd->addr = tbi->bufPA;
                        txd->len = txm->data_len;
 
                        /* Mark the last descriptor as End of Packet. */
@@ -707,11 +722,12 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
                           unsigned int socket_id,
                           __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
 {
-       struct vmxnet3_hw     *hw = dev->data->dev_private;
+       struct vmxnet3_hw *hw = dev->data->dev_private;
        const struct rte_memzone *mz;
        struct vmxnet3_tx_queue *txq;
        struct vmxnet3_cmd_ring *ring;
        struct vmxnet3_comp_ring *comp_ring;
+       struct vmxnet3_data_ring *data_ring;
        int size;
 
        PMD_INIT_FUNC_TRACE();
@@ -743,6 +759,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        ring = &txq->cmd_ring;
        comp_ring = &txq->comp_ring;
+       data_ring = &txq->data_ring;
 
        /* Tx vmxnet ring length should be between 512-4096 */
        if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
@@ -757,7 +774,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
                ring->size = nb_desc;
                ring->size &= ~VMXNET3_RING_SIZE_MASK;
        }
-       comp_ring->size = ring->size;
+       comp_ring->size = data_ring->size = ring->size;
 
        /* Tx vmxnet rings structure initialization*/
        ring->next2fill = 0;
@@ -768,6 +785,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
        size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
+       size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
 
        mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
        if (mz == NULL) {
@@ -785,6 +803,11 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
        comp_ring->basePA = ring->basePA +
                (sizeof(struct Vmxnet3_TxDesc) * ring->size);
 
+       /* data_ring initialization */
+       data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
+       data_ring->basePA = comp_ring->basePA +
+                       (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
+
        /* cmd_ring0 buf_info allocation */
        ring->buf_info = rte_zmalloc("tx_ring_buf_info",
                                     ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
@@ -895,7 +918,7 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
        ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
 
        /* comp_ring initialization */
-       comp_ring->base = ring1->base +  ring1->size;
+       comp_ring->base = ring1->base + ring1->size;
        comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
                ring1->size;