app/test: shorten execution time
[dpdk.git] / examples / vhost / main.c
index aecc9c3..665886e 100644 (file)
@@ -212,17 +212,10 @@ struct mbuf_table {
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+#define MBUF_TABLE_DRAIN_TSC   ((rte_get_tsc_hz() + US_PER_S - 1) \
+                                / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
 
-/* Per-device statistics struct */
-struct device_statistics {
-       uint64_t tx_total;
-       rte_atomic64_t rx_total_atomic;
-       uint64_t tx;
-       rte_atomic64_t rx_atomic;
-} __rte_cache_aligned;
-struct device_statistics dev_statistics[MAX_DEVICES];
-
 /*
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
@@ -796,17 +789,17 @@ unlink_vmdq(struct vhost_dev *vdev)
 }
 
 static inline void __attribute__((always_inline))
-virtio_xmit(struct virtio_net *dst_dev, struct virtio_net *src_dev,
+virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
            struct rte_mbuf *m)
 {
        uint16_t ret;
 
-       ret = rte_vhost_enqueue_burst(dst_dev, VIRTIO_RXQ, &m, 1);
+       ret = rte_vhost_enqueue_burst(dst_vdev->dev, VIRTIO_RXQ, &m, 1);
        if (enable_stats) {
-               rte_atomic64_inc(&dev_statistics[dst_dev->device_fh].rx_total_atomic);
-               rte_atomic64_add(&dev_statistics[dst_dev->device_fh].rx_atomic, ret);
-               dev_statistics[src_dev->device_fh].tx_total++;
-               dev_statistics[src_dev->device_fh].tx += ret;
+               rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
+               rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+               src_vdev->stats.tx_total++;
+               src_vdev->stats.tx += ret;
        }
 }
 
@@ -844,7 +837,7 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
                return 0;
        }
 
-       virtio_xmit(dst_vdev->dev, vdev->dev, m);
+       virtio_xmit(dst_vdev, vdev, m);
        return 0;
 }
 
@@ -914,16 +907,35 @@ static void virtio_tx_offload(struct rte_mbuf *m)
        tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+       while (n--)
+               rte_pktmbuf_free(pkts[n]);
+}
+
+static inline void __attribute__((always_inline))
+do_drain_mbuf_table(struct mbuf_table *tx_q)
+{
+       uint16_t count;
+
+       count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
+                                tx_q->m_table, tx_q->len);
+       if (unlikely(count < tx_q->len))
+               free_pkts(&tx_q->m_table[count], tx_q->len - count);
+
+       tx_q->len = 0;
+}
+
 /*
- * This function routes the TX packet to the correct interface. This may be a local device
- * or the physical port.
+ * This function routes the TX packet to the correct interface. This
+ * may be a local device or the physical port.
  */
 static inline void __attribute__((always_inline))
 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 {
        struct mbuf_table *tx_q;
-       struct rte_mbuf **m_table;
-       unsigned len, ret, offset = 0;
+       unsigned offset = 0;
        const uint16_t lcore_id = rte_lcore_id();
        struct virtio_net *dev = vdev->dev;
        struct ether_hdr *nh;
@@ -934,7 +946,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
                struct vhost_dev *vdev2;
 
                TAILQ_FOREACH(vdev2, &vhost_dev_list, next) {
-                       virtio_xmit(vdev2->dev, vdev->dev, m);
+                       virtio_xmit(vdev2, vdev, m);
                }
                goto queue2nic;
        }
@@ -959,7 +971,6 @@ queue2nic:
 
        /*Add packet to the port tx queue*/
        tx_q = &lcore_tx_queue[lcore_id];
-       len = tx_q->len;
 
        nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
        if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
@@ -997,55 +1008,128 @@ queue2nic:
        if (m->ol_flags & PKT_TX_TCP_SEG)
                virtio_tx_offload(m);
 
-       tx_q->m_table[len] = m;
-       len++;
+       tx_q->m_table[tx_q->len++] = m;
        if (enable_stats) {
-               dev_statistics[dev->device_fh].tx_total++;
-               dev_statistics[dev->device_fh].tx++;
+               vdev->stats.tx_total++;
+               vdev->stats.tx++;
        }
 
-       if (unlikely(len == MAX_PKT_BURST)) {
-               m_table = (struct rte_mbuf **)tx_q->m_table;
-               ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
-               /* Free any buffers not handled by TX and update the port stats. */
-               if (unlikely(ret < len)) {
-                       do {
-                               rte_pktmbuf_free(m_table[ret]);
-                       } while (++ret < len);
+       if (unlikely(tx_q->len == MAX_PKT_BURST))
+               do_drain_mbuf_table(tx_q);
+}
+
+
+static inline void __attribute__((always_inline))
+drain_mbuf_table(struct mbuf_table *tx_q)
+{
+       static uint64_t prev_tsc;
+       uint64_t cur_tsc;
+
+       if (tx_q->len == 0)
+               return;
+
+       cur_tsc = rte_rdtsc();
+       if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
+               prev_tsc = cur_tsc;
+
+               RTE_LOG(DEBUG, VHOST_DATA,
+                       "TX queue drained after timeout with burst size %u\n",
+                       tx_q->len);
+               do_drain_mbuf_table(tx_q);
+       }
+}
+
+static inline void __attribute__((always_inline))
+drain_eth_rx(struct vhost_dev *vdev)
+{
+       uint16_t rx_count, enqueue_count;
+       struct virtio_net *dev = vdev->dev;
+       struct rte_mbuf *pkts[MAX_PKT_BURST];
+
+       rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
+                                   pkts, MAX_PKT_BURST);
+       if (!rx_count)
+               return;
+
+       /*
+        * When "enable_retry" is set, here we wait and retry when there
+        * is no enough free slots in the queue to hold @rx_count packets,
+        * to diminish packet loss.
+        */
+       if (enable_retry &&
+           unlikely(rx_count > rte_vring_available_entries(dev,
+                       VIRTIO_RXQ))) {
+               uint32_t retry;
+
+               for (retry = 0; retry < burst_rx_retry_num; retry++) {
+                       rte_delay_us(burst_rx_delay_time);
+                       if (rx_count <= rte_vring_available_entries(dev,
+                                       VIRTIO_RXQ))
+                               break;
                }
+       }
 
-               len = 0;
+       enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+                                               pkts, rx_count);
+       if (enable_stats) {
+               rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
+               rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
        }
 
-       tx_q->len = len;
-       return;
+       free_pkts(pkts, rx_count);
 }
+
+static inline void __attribute__((always_inline))
+drain_virtio_tx(struct vhost_dev *vdev)
+{
+       struct rte_mbuf *pkts[MAX_PKT_BURST];
+       uint16_t count;
+       uint16_t i;
+
+       count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+                                       pkts, MAX_PKT_BURST);
+
+       /* setup VMDq for the first packet */
+       if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
+               if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
+                       free_pkts(pkts, count);
+       }
+
+       for (i = 0; i < count; ++i) {
+               virtio_tx_route(vdev, pkts[i],
+                       vlan_tags[(uint16_t)vdev->dev->device_fh]);
+       }
+}
+
 /*
- * This function is called by each data core. It handles all RX/TX registered with the
- * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
- * with all devices in the main linked list.
+ * Main function of vhost-switch. It basically does:
+ *
+ * for each vhost device {
+ *    - drain_eth_rx()
+ *
+ *      Which drains the host eth Rx queue linked to the vhost device,
+ *      and deliver all of them to guest virito Rx ring associated with
+ *      this vhost device.
+ *
+ *    - drain_virtio_tx()
+ *
+ *      Which drains the guest virtio Tx queue and deliver all of them
+ *      to the target, which could be another vhost device, or the
+ *      physical eth dev. The route is done in function "virtio_tx_route".
+ * }
  */
 static int
-switch_worker(__attribute__((unused)) void *arg)
+switch_worker(void *arg __rte_unused)
 {
-       struct virtio_net *dev = NULL;
-       struct vhost_dev *vdev = NULL;
-       struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+       unsigned i;
+       unsigned lcore_id = rte_lcore_id();
+       struct vhost_dev *vdev;
        struct mbuf_table *tx_q;
-       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
-       uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
-       unsigned ret, i;
-       const uint16_t lcore_id = rte_lcore_id();
-       const uint16_t num_cores = (uint16_t)rte_lcore_count();
-       uint16_t rx_count = 0;
-       uint16_t tx_count;
-       uint32_t retry = 0;
 
        RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
-       prev_tsc = 0;
 
        tx_q = &lcore_tx_queue[lcore_id];
-       for (i = 0; i < num_cores; i ++) {
+       for (i = 0; i < rte_lcore_count(); i++) {
                if (lcore_ids[i] == lcore_id) {
                        tx_q->txq_id = i;
                        break;
@@ -1053,34 +1137,7 @@ switch_worker(__attribute__((unused)) void *arg)
        }
 
        while(1) {
-               cur_tsc = rte_rdtsc();
-               /*
-                * TX burst queue drain
-                */
-               diff_tsc = cur_tsc - prev_tsc;
-               if (unlikely(diff_tsc > drain_tsc)) {
-
-                       if (tx_q->len) {
-                               RTE_LOG(DEBUG, VHOST_DATA,
-                                       "TX queue drained after timeout with burst size %u\n",
-                                       tx_q->len);
-
-                               /*Tx any packets in the queue*/
-                               ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
-                                                                          (struct rte_mbuf **)tx_q->m_table,
-                                                                          (uint16_t)tx_q->len);
-                               if (unlikely(ret < tx_q->len)) {
-                                       do {
-                                               rte_pktmbuf_free(tx_q->m_table[ret]);
-                                       } while (++ret < tx_q->len);
-                               }
-
-                               tx_q->len = 0;
-                       }
-
-                       prev_tsc = cur_tsc;
-
-               }
+               drain_mbuf_table(tx_q);
 
                /*
                 * Inform the configuration core that we have exited the
@@ -1090,69 +1147,20 @@ switch_worker(__attribute__((unused)) void *arg)
                        lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
 
                /*
-                * Process devices
+                * Process vhost devices
                 */
                TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, next) {
-                       uint64_t fh;
-
-                       dev = vdev->dev;
-                       fh  = dev->device_fh;
-
                        if (unlikely(vdev->remove)) {
                                unlink_vmdq(vdev);
                                vdev->ready = DEVICE_SAFE_REMOVE;
                                continue;
                        }
 
-                       if (likely(vdev->ready == DEVICE_RX)) {
-                               /*Handle guest RX*/
-                               rx_count = rte_eth_rx_burst(ports[0],
-                                       vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
-
-                               if (rx_count) {
-                                       /*
-                                       * Retry is enabled and the queue is full then we wait and retry to avoid packet loss
-                                       * Here MAX_PKT_BURST must be less than virtio queue size
-                                       */
-                                       if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
-                                               for (retry = 0; retry < burst_rx_retry_num; retry++) {
-                                                       rte_delay_us(burst_rx_delay_time);
-                                                       if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
-                                                               break;
-                                               }
-                                       }
-                                       ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
-                                       if (enable_stats) {
-                                               rte_atomic64_add(
-                                                       &dev_statistics[fh].rx_total_atomic,
-                                                       rx_count);
-                                               rte_atomic64_add(
-                                                       &dev_statistics[fh].rx_atomic,
-                                                       ret_count);
-                                       }
-                                       while (likely(rx_count)) {
-                                               rx_count--;
-                                               rte_pktmbuf_free(pkts_burst[rx_count]);
-                                       }
-
-                               }
-                       }
+                       if (likely(vdev->ready == DEVICE_RX))
+                               drain_eth_rx(vdev);
 
-                       if (likely(!vdev->remove)) {
-                               /* Handle guest TX*/
-                               tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
-                               /* If this is the first received packet we need to learn the MAC and setup VMDQ */
-                               if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
-                                       if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
-                                               while (tx_count)
-                                                       rte_pktmbuf_free(pkts_burst[--tx_count]);
-                                       }
-                               }
-                               for (i = 0; i < tx_count; ++i) {
-                                       virtio_tx_route(vdev, pkts_burst[i],
-                                               vlan_tags[(uint16_t)dev->device_fh]);
-                               }
-                       }
+                       if (likely(!vdev->remove))
+                               drain_virtio_tx(vdev);
                }
        }
 
@@ -1246,9 +1254,6 @@ new_device (struct virtio_net *dev)
        TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev, next);
        lcore_info[vdev->coreid].device_num++;
 
-       /* Initialize device stats */
-       memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));
-
        /* Disable notifications. */
        rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
        rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
@@ -1279,7 +1284,6 @@ print_stats(void)
        struct vhost_dev *vdev;
        uint64_t tx_dropped, rx_dropped;
        uint64_t tx, tx_total, rx, rx_total;
-       uint32_t device_fh;
        const char clr[] = { 27, '[', '2', 'J', '\0' };
        const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
 
@@ -1287,37 +1291,32 @@ print_stats(void)
                sleep(enable_stats);
 
                /* Clear screen and move to top left */
-               printf("%s%s", clr, top_left);
-
-               printf("\nDevice statistics ====================================");
+               printf("%s%s\n", clr, top_left);
+               printf("Device statistics =================================\n");
 
                TAILQ_FOREACH(vdev, &vhost_dev_list, next) {
-                       device_fh = vdev->dev->device_fh;
-                       tx_total = dev_statistics[device_fh].tx_total;
-                       tx = dev_statistics[device_fh].tx;
+                       tx_total   = vdev->stats.tx_total;
+                       tx         = vdev->stats.tx;
                        tx_dropped = tx_total - tx;
-                       rx_total = rte_atomic64_read(
-                               &dev_statistics[device_fh].rx_total_atomic);
-                       rx = rte_atomic64_read(
-                               &dev_statistics[device_fh].rx_atomic);
+
+                       rx_total   = rte_atomic64_read(&vdev->stats.rx_total_atomic);
+                       rx         = rte_atomic64_read(&vdev->stats.rx_atomic);
                        rx_dropped = rx_total - rx;
 
-                       printf("\nStatistics for device %"PRIu32" ------------------------------"
-                                       "\nTX total:            %"PRIu64""
-                                       "\nTX dropped:          %"PRIu64""
-                                       "\nTX successful:               %"PRIu64""
-                                       "\nRX total:            %"PRIu64""
-                                       "\nRX dropped:          %"PRIu64""
-                                       "\nRX successful:               %"PRIu64"",
-                                       device_fh,
-                                       tx_total,
-                                       tx_dropped,
-                                       tx,
-                                       rx_total,
-                                       rx_dropped,
-                                       rx);
+                       printf("Statistics for device %" PRIu64 "\n"
+                               "-----------------------\n"
+                               "TX total:              %" PRIu64 "\n"
+                               "TX dropped:            %" PRIu64 "\n"
+                               "TX successful:         %" PRIu64 "\n"
+                               "RX total:              %" PRIu64 "\n"
+                               "RX dropped:            %" PRIu64 "\n"
+                               "RX successful:         %" PRIu64 "\n",
+                               vdev->dev->device_fh,
+                               tx_total, tx_dropped, tx,
+                               rx_total, rx_dropped, rx);
                }
-               printf("\n======================================================\n");
+
+               printf("===================================================\n");
        }
 }
 
@@ -1422,8 +1421,6 @@ main(int argc, char *argv[])
 
        /* Get the number of physical ports. */
        nb_ports = rte_eth_dev_count();
-       if (nb_ports > RTE_MAX_ETHPORTS)
-               nb_ports = RTE_MAX_ETHPORTS;
 
        /*
         * Update the global var NUM_PORTS and global array PORTS
@@ -1466,9 +1463,6 @@ main(int argc, char *argv[])
                                "Cannot initialize network ports\n");
        }
 
-       /* Initialize device stats */
-       memset(&dev_statistics, 0, sizeof(dev_statistics));
-
        /* Enable stats if the user option is set. */
        if (enable_stats) {
                ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);