examples/vhost: fix ioat dependency
[dpdk.git] / examples / vhost / main.c
index 9427d59..8d8c303 100644 (file)
@@ -25,6 +25,7 @@
 #include <rte_tcp.h>
 #include <rte_pause.h>
 
+#include "ioat.h"
 #include "main.h"
 
 #ifndef MAX_QUEUES
@@ -92,10 +93,13 @@ static uint32_t enable_tx_csum;
 static uint32_t enable_tso;
 
 static int client_mode;
-static int dequeue_zero_copy;
 
 static int builtin_net_driver;
 
+static int async_vhost_driver;
+
+static char dma_type[MAX_LONG_OPT_SZ];
+
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -182,6 +186,15 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
                                 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
 
+static inline int
+open_dma(const char *value)
+{
+       if (strncmp(dma_type, "ioat", 4) == 0)
+               return open_ioat(value);
+
+       return -1;
+}
+
 /*
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
@@ -228,7 +241,14 @@ port_init(uint16_t port)
        uint16_t q;
 
        /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
-       rte_eth_dev_info_get (port, &dev_info);
+       retval = rte_eth_dev_info_get(port, &dev_info);
+       if (retval != 0) {
+               RTE_LOG(ERR, VHOST_PORT,
+                       "Error during getting device (port %u) info: %s\n",
+                       port, strerror(-retval));
+
+               return retval;
+       }
 
        rxconf = &dev_info.default_rxconf;
        txconf = &dev_info.default_txconf;
@@ -240,16 +260,6 @@ port_init(uint16_t port)
        rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
        tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
 
-       /*
-        * When dequeue zero copy is enabled, guest Tx used vring will be
-        * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
-        * (tx_ring_size here) must be small enough so that the driver will
-        * hit the free threshold easily and free mbufs timely. Otherwise,
-        * guest Tx vring would be starved.
-        */
-       if (dequeue_zero_copy)
-               tx_ring_size = 64;
-
        tx_rings = (uint16_t)rte_lcore_count();
 
        /* Get port configuration. */
@@ -329,10 +339,24 @@ port_init(uint16_t port)
                return retval;
        }
 
-       if (promiscuous)
-               rte_eth_promiscuous_enable(port);
+       if (promiscuous) {
+               retval = rte_eth_promiscuous_enable(port);
+               if (retval != 0) {
+                       RTE_LOG(ERR, VHOST_PORT,
+                               "Failed to enable promiscuous mode on port %u: %s\n",
+                               port, rte_strerror(-retval));
+                       return retval;
+               }
+       }
+
+       retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+       if (retval < 0) {
+               RTE_LOG(ERR, VHOST_PORT,
+                       "Failed to get MAC address on port %u: %s\n",
+                       port, rte_strerror(-retval));
+               return retval;
+       }
 
-       rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
        RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
        RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
                        " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
@@ -386,10 +410,7 @@ parse_portmask(const char *portmask)
        /* parse hexadecimal string */
        pm = strtoul(portmask, &end, 16);
        if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
-               return -1;
-
-       if (pm == 0)
-               return -1;
+               return 0;
 
        return pm;
 
@@ -440,7 +461,8 @@ us_vhost_usage(const char *prgname)
        "               --tx-csum [0|1] disable/enable TX checksum offload.\n"
        "               --tso [0|1] disable/enable TCP segment offload.\n"
        "               --client register a vhost-user socket as client mode.\n"
-       "               --dequeue-zero-copy enables dequeue zero copy\n",
+       "               --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
+       "               --dmas register dma channel for specific vhost device.\n",
               prgname);
 }
 
@@ -465,8 +487,9 @@ us_vhost_parse_args(int argc, char **argv)
                {"tx-csum", required_argument, NULL, 0},
                {"tso", required_argument, NULL, 0},
                {"client", no_argument, &client_mode, 1},
-               {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
                {"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+               {"dma-type", required_argument, NULL, 0},
+               {"dmas", required_argument, NULL, 0},
                {NULL, 0, 0, 0},
        };
 
@@ -609,6 +632,28 @@ us_vhost_parse_args(int argc, char **argv)
                                }
                        }
 
+                       if (!strncmp(long_option[option_index].name,
+                                               "dma-type", MAX_LONG_OPT_SZ)) {
+                               if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
+                                       RTE_LOG(INFO, VHOST_CONFIG,
+                                               "Wrong DMA type\n");
+                                       us_vhost_usage(prgname);
+                                       return -1;
+                               }
+                               strcpy(dma_type, optarg);
+                       }
+
+                       if (!strncmp(long_option[option_index].name,
+                                               "dmas", MAX_LONG_OPT_SZ)) {
+                               if (open_dma(optarg) == -1) {
+                                       RTE_LOG(INFO, VHOST_CONFIG,
+                                               "Wrong DMA args\n");
+                                       us_vhost_usage(prgname);
+                                       return -1;
+                               }
+                               async_vhost_driver = 1;
+                       }
+
                        break;
 
                        /* Invalid option - print options. */
@@ -764,9 +809,22 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
            struct rte_mbuf *m)
 {
        uint16_t ret;
+       struct rte_mbuf *m_cpl[1];
 
        if (builtin_net_driver) {
                ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+       } else if (async_vhost_driver) {
+               ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+                                               &m, 1);
+
+               if (likely(ret))
+                       dst_vdev->nr_async_pkts++;
+
+               while (likely(dst_vdev->nr_async_pkts)) {
+                       if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+                                       VIRTIO_RXQ, m_cpl, 1))
+                               dst_vdev->nr_async_pkts--;
+               }
        } else {
                ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
        }
@@ -858,7 +916,7 @@ get_psd_sum(void *l3_hdr, uint64_t ol_flags)
 {
        if (ol_flags & PKT_TX_IPV4)
                return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-       else /* assume ethertype == RTE_ETHER_TYPE_IPv6 */
+       else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
                return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
 }
 
@@ -1015,6 +1073,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
        }
 }
 
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+       struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+       uint16_t complete_count;
+
+       complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+                                               qid, p_cpl, MAX_PKT_BURST);
+       vdev->nr_async_pkts -= complete_count;
+       if (complete_count)
+               free_pkts(p_cpl, complete_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1023,6 +1094,10 @@ drain_eth_rx(struct vhost_dev *vdev)
 
        rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
                                    pkts, MAX_PKT_BURST);
+
+       while (likely(vdev->nr_async_pkts))
+               complete_async_pkts(vdev, VIRTIO_RXQ);
+
        if (!rx_count)
                return;
 
@@ -1047,16 +1122,22 @@ drain_eth_rx(struct vhost_dev *vdev)
        if (builtin_net_driver) {
                enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
                                                pkts, rx_count);
+       } else if (async_vhost_driver) {
+               enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+                                       VIRTIO_RXQ, pkts, rx_count);
+               vdev->nr_async_pkts += enqueue_count;
        } else {
                enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
                                                pkts, rx_count);
        }
+
        if (enable_stats) {
                rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
                rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
        }
 
-       free_pkts(pkts, rx_count);
+       if (!async_vhost_driver)
+               free_pkts(pkts, rx_count);
 }
 
 static __rte_always_inline void
@@ -1184,7 +1265,7 @@ destroy_device(int vid)
 
 
        /* Set the dev_removal_flag on each lcore. */
-       RTE_LCORE_FOREACH_SLAVE(lcore)
+       RTE_LCORE_FOREACH_WORKER(lcore)
                lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
 
        /*
@@ -1192,7 +1273,7 @@ destroy_device(int vid)
         * we can be sure that they can no longer access the device removed
         * from the linked lists and that the devices are no longer in use.
         */
-       RTE_LCORE_FOREACH_SLAVE(lcore) {
+       RTE_LCORE_FOREACH_WORKER(lcore) {
                while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
                        rte_pause();
        }
@@ -1203,6 +1284,9 @@ destroy_device(int vid)
                "(%d) device has been removed from data core\n",
                vdev->vid);
 
+       if (async_vhost_driver)
+               rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
        rte_free(vdev);
 }
 
@@ -1216,7 +1300,6 @@ new_device(int vid)
        int lcore, core_add = 0;
        uint32_t device_num_min = num_devices;
        struct vhost_dev *vdev;
-
        vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
        if (vdev == NULL) {
                RTE_LOG(INFO, VHOST_DATA,
@@ -1237,7 +1320,7 @@ new_device(int vid)
        vdev->remove = 0;
 
        /* Find a suitable lcore to add the device. */
-       RTE_LCORE_FOREACH_SLAVE(lcore) {
+       RTE_LCORE_FOREACH_WORKER(lcore) {
                if (lcore_info[lcore].device_num < device_num_min) {
                        device_num_min = lcore_info[lcore].device_num;
                        core_add = lcore;
@@ -1257,6 +1340,20 @@ new_device(int vid)
                "(%d) device has been added to data core %d\n",
                vid, vdev->coreid);
 
+       if (async_vhost_driver) {
+               struct rte_vhost_async_features f;
+               struct rte_vhost_async_channel_ops channel_ops;
+               if (strncmp(dma_type, "ioat", 4) == 0) {
+                       channel_ops.transfer_data = ioat_transfer_data_cb;
+                       channel_ops.check_completed_copies =
+                               ioat_check_completed_copies_cb;
+                       f.async_inorder = 1;
+                       f.async_threshold = 256;
+                       return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+                               f.intval, &channel_ops);
+               }
+       }
+
        return 0;
 }
 
@@ -1313,6 +1410,8 @@ print_stats(__rte_unused void *arg)
                }
 
                printf("===================================================\n");
+
+               fflush(stdout);
        }
 
        return NULL;
@@ -1484,18 +1583,18 @@ main(int argc, char *argv[])
        }
 
        /* Launch all data cores. */
-       RTE_LCORE_FOREACH_SLAVE(lcore_id)
+       RTE_LCORE_FOREACH_WORKER(lcore_id)
                rte_eal_remote_launch(switch_worker, NULL, lcore_id);
 
        if (client_mode)
                flags |= RTE_VHOST_USER_CLIENT;
 
-       if (dequeue_zero_copy)
-               flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
-
        /* Register vhost user driver to handle vhost messages. */
        for (i = 0; i < nb_sockets; i++) {
                char *file = socket_files + i * PATH_MAX;
+               if (async_vhost_driver)
+                       flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
                ret = rte_vhost_driver_register(file, flags);
                if (ret != 0) {
                        unregister_drivers(i);
@@ -1545,7 +1644,7 @@ main(int argc, char *argv[])
                }
        }
 
-       RTE_LCORE_FOREACH_SLAVE(lcore_id)
+       RTE_LCORE_FOREACH_WORKER(lcore_id)
                rte_eal_wait_lcore(lcore_id);
 
        return 0;