vmxnet3: improve Rx performance
authorYong Wang <yongwang@vmware.com>
Wed, 5 Nov 2014 01:49:42 +0000 (17:49 -0800)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 14 Nov 2014 16:32:01 +0000 (17:32 +0100)
This patch includes two small performance optimizations
on the rx path:

(1) It adds unlikely hints on various infrequent error
paths to the compiler to make branch prediction more
efficient.

(2) It also moves a constant assignment out of the pkt
polling loop.  This saves one branching per packet.

Performance evaluation configs:
- On the DPDK-side, it's running some l3 forwarding app
inside a VM on ESXi with one core assigned for polling.
- On the client side, pktgen/dpdk is used to generate
64B tcp packets at line rate (14.8M PPS).

Performance results on a Nehalem box (4cores@2.8GHzx2)
shown below.  CPU usage is collected factoring out the
idle loop cost.
- Before the patch, ~900K PPS with 65% CPU of a core
used for DPDK.
- After the patch, only 45% of a core used, while
maintaining the same packet rate.

Signed-off-by: Yong Wang <yongwang@vmware.com>
lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c

index e2fb8a8..4799f4d 100644 (file)
@@ -451,6 +451,19 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
        uint32_t i = 0, val = 0;
        struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
 
+       if (ring_id == 0) {
+               /* Usually: One HEAD type buf per packet
+                * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
+                * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
+                */
+
+               /* We use single packet buffer so all heads here */
+               val = VMXNET3_RXD_BTYPE_HEAD;
+       } else {
+               /* All BODY type buffers for 2nd ring */
+               val = VMXNET3_RXD_BTYPE_BODY;
+       }
+
        while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
                struct Vmxnet3_RxDesc *rxd;
                struct rte_mbuf *mbuf;
@@ -458,22 +471,9 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
 
                rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
 
-               if (ring->rid == 0) {
-                       /* Usually: One HEAD type buf per packet
-                        * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
-                        * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
-                        */
-
-                       /* We use single packet buffer so all heads here */
-                       val = VMXNET3_RXD_BTYPE_HEAD;
-               } else {
-                       /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
-                       val = VMXNET3_RXD_BTYPE_BODY;
-               }
-
                /* Allocate blank mbuf for the current Rx Descriptor */
                mbuf = rte_rxmbuf_alloc(rxq->mp);
-               if (mbuf == NULL) {
+               if (unlikely(mbuf == NULL)) {
                        PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
                        rxq->stats.rx_buf_alloc_failure++;
                        err = ENOMEM;
@@ -536,151 +536,141 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
        rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
 
-       if (rxq->stopped) {
+       if (unlikely(rxq->stopped)) {
                PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
                return 0;
        }
 
        while (rcd->gen == rxq->comp_ring.gen) {
-
                if (nb_rx >= nb_pkts)
                        break;
+
                idx = rcd->rxdIdx;
                ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
                rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
                rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
 
-               if (rcd->sop != 1 || rcd->eop != 1) {
+               if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
                        rte_pktmbuf_free_seg(rbi->m);
-
                        PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
                        goto rcd_done;
+               }
 
-               } else {
-
-                       PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
+               PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
 
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-                       VMXNET3_ASSERT(rcd->len <= rxd->len);
-                       VMXNET3_ASSERT(rbi->m);
+               VMXNET3_ASSERT(rcd->len <= rxd->len);
+               VMXNET3_ASSERT(rbi->m);
 #endif
-                       if (rcd->len == 0) {
-                               PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
-                                          ring_idx, idx);
+               if (unlikely(rcd->len == 0)) {
+                       PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
+                                  ring_idx, idx);
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-                               VMXNET3_ASSERT(rcd->sop && rcd->eop);
+                       VMXNET3_ASSERT(rcd->sop && rcd->eop);
 #endif
-                               rte_pktmbuf_free_seg(rbi->m);
-
-                               goto rcd_done;
-                       }
+                       rte_pktmbuf_free_seg(rbi->m);
+                       goto rcd_done;
+               }
 
-                       /* Assuming a packet is coming in a single packet buffer */
-                       if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
-                               PMD_RX_LOG(DEBUG,
-                                          "Alert : Misbehaving device, incorrect "
-                                          " buffer type used. iPacket dropped.");
-                               rte_pktmbuf_free_seg(rbi->m);
-                               goto rcd_done;
-                       }
+               /* Assuming a packet is coming in a single packet buffer */
+               if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
+                       PMD_RX_LOG(DEBUG,
+                                  "Alert : Misbehaving device, incorrect "
+                                  " buffer type used. iPacket dropped.");
+                       rte_pktmbuf_free_seg(rbi->m);
+                       goto rcd_done;
+               }
 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-                       VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
+               VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
 #endif
-                       /* Get the packet buffer pointer from buf_info */
-                       rxm = rbi->m;
-
-                       /* Clear descriptor associated buf_info to be reused */
-                       rbi->m = NULL;
-                       rbi->bufPA = 0;
-
-                       /* Update the index that we received a packet */
-                       rxq->cmd_ring[ring_idx].next2comp = idx;
-
-                       /* For RCD with EOP set, check if there is frame error */
-                       if (rcd->err) {
-                               rxq->stats.drop_total++;
-                               rxq->stats.drop_err++;
-
-                               if (!rcd->fcs) {
-                                       rxq->stats.drop_fcs++;
-                                       PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
-                               }
-                               PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
-                                          (int)(rcd - (struct Vmxnet3_RxCompDesc *)
-                                                rxq->comp_ring.base), rcd->rxdIdx);
-                               rte_pktmbuf_free_seg(rxm);
-
-                               goto rcd_done;
-                       }
+               /* Get the packet buffer pointer from buf_info */
+               rxm = rbi->m;
 
-                       /* Check for hardware stripped VLAN tag */
-                       if (rcd->ts) {
-                               PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
-                                          rcd->tci);
-                               rxm->ol_flags = PKT_RX_VLAN_PKT;
-#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
-                               VMXNET3_ASSERT(rxm &&
-                                              rte_pktmbuf_mtod(rxm, void *));
-#endif
-                               /* Copy vlan tag in packet buffer */
-                               rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
-                       } else {
-                               rxm->ol_flags = 0;
-                               rxm->vlan_tci = 0;
-                       }
+               /* Clear descriptor associated buf_info to be reused */
+               rbi->m = NULL;
+               rbi->bufPA = 0;
 
-                       /* Initialize newly received packet buffer */
-                       rxm->port = rxq->port_id;
-                       rxm->nb_segs = 1;
-                       rxm->next = NULL;
-                       rxm->pkt_len = (uint16_t)rcd->len;
-                       rxm->data_len = (uint16_t)rcd->len;
-                       rxm->port = rxq->port_id;
-                       rxm->data_off = RTE_PKTMBUF_HEADROOM;
-
-                       /* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */
-                       if (rcd->v4) {
-                               struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
-                               struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
-
-                               if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
-                                       rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
-                               else
-                                       rxm->ol_flags |= PKT_RX_IPV4_HDR;
-
-                               if (!rcd->cnc) {
-                                       if (!rcd->ipc)
-                                               rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-
-                                       if ((rcd->tcp || rcd->udp) && !rcd->tuc)
-                                               rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-                               }
-                       }
+               /* Update the index that we received a packet */
+               rxq->cmd_ring[ring_idx].next2comp = idx;
 
-                       rx_pkts[nb_rx++] = rxm;
-rcd_done:
-                       rxq->cmd_ring[ring_idx].next2comp = idx;
-                       VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
+               /* For RCD with EOP set, check if there is frame error */
+               if (unlikely(rcd->err)) {
+                       rxq->stats.drop_total++;
+                       rxq->stats.drop_err++;
 
-                       /* It's time to allocate some new buf and renew descriptors */
-                       vmxnet3_post_rx_bufs(rxq, ring_idx);
-                       if (unlikely(rxq->shared->ctrl.updateRxProd)) {
-                               VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
-                                                      rxq->cmd_ring[ring_idx].next2fill);
+                       if (!rcd->fcs) {
+                               rxq->stats.drop_fcs++;
+                               PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
                        }
+                       PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
+                                  (int)(rcd - (struct Vmxnet3_RxCompDesc *)
+                                        rxq->comp_ring.base), rcd->rxdIdx);
+                       rte_pktmbuf_free_seg(rxm);
+                       goto rcd_done;
+               }
 
-                       /* Advance to the next descriptor in comp_ring */
-                       vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
+               /* Check for hardware stripped VLAN tag */
+               if (rcd->ts) {
+                       PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
+                                  rcd->tci);
+                       rxm->ol_flags = PKT_RX_VLAN_PKT;
+                       /* Copy vlan tag in packet buffer */
+                       rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
+               } else {
+                       rxm->ol_flags = 0;
+                       rxm->vlan_tci = 0;
+               }
 
-                       rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
-                       nb_rxd++;
-                       if (nb_rxd > rxq->cmd_ring[0].size) {
-                               PMD_RX_LOG(ERR,
-                                          "Used up quota of receiving packets,"
-                                          " relinquish control.");
-                               break;
+               /* Initialize newly received packet buffer */
+               rxm->port = rxq->port_id;
+               rxm->nb_segs = 1;
+               rxm->next = NULL;
+               rxm->pkt_len = (uint16_t)rcd->len;
+               rxm->data_len = (uint16_t)rcd->len;
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+               /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
+               if (rcd->v4) {
+                       struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
+                       struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
+
+                       if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
+                               rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
+                       else
+                               rxm->ol_flags |= PKT_RX_IPV4_HDR;
+
+                       if (!rcd->cnc) {
+                               if (!rcd->ipc)
+                                       rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+
+                               if ((rcd->tcp || rcd->udp) && !rcd->tuc)
+                                       rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
                        }
                }
+
+               rx_pkts[nb_rx++] = rxm;
+rcd_done:
+               rxq->cmd_ring[ring_idx].next2comp = idx;
+               VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
+
+               /* It's time to allocate some new buf and renew descriptors */
+               vmxnet3_post_rx_bufs(rxq, ring_idx);
+               if (unlikely(rxq->shared->ctrl.updateRxProd)) {
+                       VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
+                                              rxq->cmd_ring[ring_idx].next2fill);
+               }
+
+               /* Advance to the next descriptor in comp_ring */
+               vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
+
+               rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
+               nb_rxd++;
+               if (nb_rxd > rxq->cmd_ring[0].size) {
+                       PMD_RX_LOG(ERR,
+                                  "Used up quota of receiving packets,"
+                                  " relinquish control.");
+                       break;
+               }
        }
 
        return nb_rx;