examples/vhost: fix ioat dependency
[dpdk.git] / examples / vhost / main.c
index e1578e7..8d8c303 100644 (file)
@@ -25,6 +25,7 @@
 #include <rte_tcp.h>
 #include <rte_pause.h>
 
+#include "ioat.h"
 #include "main.h"
 
 #ifndef MAX_QUEUES
@@ -92,10 +93,13 @@ static uint32_t enable_tx_csum;
 static uint32_t enable_tso;
 
 static int client_mode;
-static int dequeue_zero_copy;
 
 static int builtin_net_driver;
 
+static int async_vhost_driver;
+
+static char dma_type[MAX_LONG_OPT_SZ];
+
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -182,6 +186,15 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
                                 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
 
+static inline int
+open_dma(const char *value)
+{
+       if (strncmp(dma_type, "ioat", 4) == 0)
+               return open_ioat(value);
+
+       return -1;
+}
+
 /*
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
@@ -247,16 +260,6 @@ port_init(uint16_t port)
        rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
        tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
 
-       /*
-        * When dequeue zero copy is enabled, guest Tx used vring will be
-        * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
-        * (tx_ring_size here) must be small enough so that the driver will
-        * hit the free threshold easily and free mbufs timely. Otherwise,
-        * guest Tx vring would be starved.
-        */
-       if (dequeue_zero_copy)
-               tx_ring_size = 64;
-
        tx_rings = (uint16_t)rte_lcore_count();
 
        /* Get port configuration. */
@@ -458,7 +461,8 @@ us_vhost_usage(const char *prgname)
        "               --tx-csum [0|1] disable/enable TX checksum offload.\n"
        "               --tso [0|1] disable/enable TCP segment offload.\n"
        "               --client register a vhost-user socket as client mode.\n"
-       "               --dequeue-zero-copy enables dequeue zero copy\n",
+       "               --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
+       "               --dmas register dma channel for specific vhost device.\n",
               prgname);
 }
 
@@ -483,8 +487,9 @@ us_vhost_parse_args(int argc, char **argv)
                {"tx-csum", required_argument, NULL, 0},
                {"tso", required_argument, NULL, 0},
                {"client", no_argument, &client_mode, 1},
-               {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
                {"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+               {"dma-type", required_argument, NULL, 0},
+               {"dmas", required_argument, NULL, 0},
                {NULL, 0, 0, 0},
        };
 
@@ -627,6 +632,28 @@ us_vhost_parse_args(int argc, char **argv)
                                }
                        }
 
+                       if (!strncmp(long_option[option_index].name,
+                                               "dma-type", MAX_LONG_OPT_SZ)) {
+                               if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
+                                       RTE_LOG(INFO, VHOST_CONFIG,
+                                               "Wrong DMA type\n");
+                                       us_vhost_usage(prgname);
+                                       return -1;
+                               }
+                               strcpy(dma_type, optarg);
+                       }
+
+                       if (!strncmp(long_option[option_index].name,
+                                               "dmas", MAX_LONG_OPT_SZ)) {
+                               if (open_dma(optarg) == -1) {
+                                       RTE_LOG(INFO, VHOST_CONFIG,
+                                               "Wrong DMA args\n");
+                                       us_vhost_usage(prgname);
+                                       return -1;
+                               }
+                               async_vhost_driver = 1;
+                       }
+
                        break;
 
                        /* Invalid option - print options. */
@@ -782,9 +809,22 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
            struct rte_mbuf *m)
 {
        uint16_t ret;
+       struct rte_mbuf *m_cpl[1];
 
        if (builtin_net_driver) {
                ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+       } else if (async_vhost_driver) {
+               ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+                                               &m, 1);
+
+               if (likely(ret))
+                       dst_vdev->nr_async_pkts++;
+
+               while (likely(dst_vdev->nr_async_pkts)) {
+                       if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+                                       VIRTIO_RXQ, m_cpl, 1))
+                               dst_vdev->nr_async_pkts--;
+               }
        } else {
                ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
        }
@@ -1033,6 +1073,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
        }
 }
 
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+       struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+       uint16_t complete_count;
+
+       complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+                                               qid, p_cpl, MAX_PKT_BURST);
+       vdev->nr_async_pkts -= complete_count;
+       if (complete_count)
+               free_pkts(p_cpl, complete_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1041,6 +1094,10 @@ drain_eth_rx(struct vhost_dev *vdev)
 
        rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
                                    pkts, MAX_PKT_BURST);
+
+       while (likely(vdev->nr_async_pkts))
+               complete_async_pkts(vdev, VIRTIO_RXQ);
+
        if (!rx_count)
                return;
 
@@ -1065,16 +1122,22 @@ drain_eth_rx(struct vhost_dev *vdev)
        if (builtin_net_driver) {
                enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
                                                pkts, rx_count);
+       } else if (async_vhost_driver) {
+               enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+                                       VIRTIO_RXQ, pkts, rx_count);
+               vdev->nr_async_pkts += enqueue_count;
        } else {
                enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
                                                pkts, rx_count);
        }
+
        if (enable_stats) {
                rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
                rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
        }
 
-       free_pkts(pkts, rx_count);
+       if (!async_vhost_driver)
+               free_pkts(pkts, rx_count);
 }
 
 static __rte_always_inline void
@@ -1202,7 +1265,7 @@ destroy_device(int vid)
 
 
        /* Set the dev_removal_flag on each lcore. */
-       RTE_LCORE_FOREACH_SLAVE(lcore)
+       RTE_LCORE_FOREACH_WORKER(lcore)
                lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
 
        /*
@@ -1210,7 +1273,7 @@ destroy_device(int vid)
         * we can be sure that they can no longer access the device removed
         * from the linked lists and that the devices are no longer in use.
         */
-       RTE_LCORE_FOREACH_SLAVE(lcore) {
+       RTE_LCORE_FOREACH_WORKER(lcore) {
                while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
                        rte_pause();
        }
@@ -1221,6 +1284,9 @@ destroy_device(int vid)
                "(%d) device has been removed from data core\n",
                vdev->vid);
 
+       if (async_vhost_driver)
+               rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
        rte_free(vdev);
 }
 
@@ -1234,7 +1300,6 @@ new_device(int vid)
        int lcore, core_add = 0;
        uint32_t device_num_min = num_devices;
        struct vhost_dev *vdev;
-
        vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
        if (vdev == NULL) {
                RTE_LOG(INFO, VHOST_DATA,
@@ -1255,7 +1320,7 @@ new_device(int vid)
        vdev->remove = 0;
 
        /* Find a suitable lcore to add the device. */
-       RTE_LCORE_FOREACH_SLAVE(lcore) {
+       RTE_LCORE_FOREACH_WORKER(lcore) {
                if (lcore_info[lcore].device_num < device_num_min) {
                        device_num_min = lcore_info[lcore].device_num;
                        core_add = lcore;
@@ -1275,6 +1340,20 @@ new_device(int vid)
                "(%d) device has been added to data core %d\n",
                vid, vdev->coreid);
 
+       if (async_vhost_driver) {
+               struct rte_vhost_async_features f;
+               struct rte_vhost_async_channel_ops channel_ops;
+               if (strncmp(dma_type, "ioat", 4) == 0) {
+                       channel_ops.transfer_data = ioat_transfer_data_cb;
+                       channel_ops.check_completed_copies =
+                               ioat_check_completed_copies_cb;
+                       f.async_inorder = 1;
+                       f.async_threshold = 256;
+                       return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+                               f.intval, &channel_ops);
+               }
+       }
+
        return 0;
 }
 
@@ -1504,18 +1583,18 @@ main(int argc, char *argv[])
        }
 
        /* Launch all data cores. */
-       RTE_LCORE_FOREACH_SLAVE(lcore_id)
+       RTE_LCORE_FOREACH_WORKER(lcore_id)
                rte_eal_remote_launch(switch_worker, NULL, lcore_id);
 
        if (client_mode)
                flags |= RTE_VHOST_USER_CLIENT;
 
-       if (dequeue_zero_copy)
-               flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
-
        /* Register vhost user driver to handle vhost messages. */
        for (i = 0; i < nb_sockets; i++) {
                char *file = socket_files + i * PATH_MAX;
+               if (async_vhost_driver)
+                       flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
                ret = rte_vhost_driver_register(file, flags);
                if (ret != 0) {
                        unregister_drivers(i);
@@ -1565,7 +1644,7 @@ main(int argc, char *argv[])
                }
        }
 
-       RTE_LCORE_FOREACH_SLAVE(lcore_id)
+       RTE_LCORE_FOREACH_WORKER(lcore_id)
                rte_eal_wait_lcore(lcore_id);
 
        return 0;