X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=22309977cebd6dc89eaef70f08961dd31fd3fef3;hb=ecf1474a618ba77ef3c4471fdc004a8c5f3b4d11;hp=d7b34b3d411bc025f1049b8c3ba7a8b0d77d681a;hpb=35b2d13fd6fdcbd191f2a30d74648faeb1186c65;p=dpdk.git

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d7b34b3d41..22309977ce 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -25,6 +25,7 @@
 #include <rte_tcp.h>
 #include <rte_pause.h>
 
+#include "ioat.h"
 #include "main.h"
 
 #ifndef MAX_QUEUES
@@ -92,10 +93,13 @@ static uint32_t enable_tx_csum;
 static uint32_t enable_tso;
 
 static int client_mode;
-static int dequeue_zero_copy;
 
 static int builtin_net_driver;
 
+static int async_vhost_driver;
+
+static char dma_type[MAX_LONG_OPT_SZ];
+
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -182,6 +186,15 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
 
+static inline int
+open_dma(const char *value)
+{
+	if (strncmp(dma_type, "ioat", 4) == 0)
+		return open_ioat(value);
+
+	return -1;
+}
+
 /*
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
@@ -228,7 +241,14 @@ port_init(uint16_t port)
 	uint16_t q;
 
 	/* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
-	rte_eth_dev_info_get (port, &dev_info);
+	retval = rte_eth_dev_info_get(port, &dev_info);
+	if (retval != 0) {
+		RTE_LOG(ERR, VHOST_PORT,
+			"Error during getting device (port %u) info: %s\n",
+			port, strerror(-retval));
+
+		return retval;
+	}
 
 	rxconf = &dev_info.default_rxconf;
 	txconf = &dev_info.default_txconf;
@@ -240,16 +260,6 @@ port_init(uint16_t port)
 	rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
 	tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
 
-	/*
-	 * When dequeue zero copy is enabled, guest Tx used vring will be
-	 * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
-	 * (tx_ring_size here) must be small enough so that the driver will
-	 * hit the free threshold easily and free mbufs timely. Otherwise,
-	 * guest Tx vring would be starved.
-	 */
-	if (dequeue_zero_copy)
-		tx_ring_size = 64;
-
 	tx_rings = (uint16_t)rte_lcore_count();
 
 	/* Get port configuration. */
@@ -329,10 +339,24 @@ port_init(uint16_t port)
 		return retval;
 	}
 
-	if (promiscuous)
-		rte_eth_promiscuous_enable(port);
+	if (promiscuous) {
+		retval = rte_eth_promiscuous_enable(port);
+		if (retval != 0) {
+			RTE_LOG(ERR, VHOST_PORT,
+				"Failed to enable promiscuous mode on port %u: %s\n",
+				port, rte_strerror(-retval));
+			return retval;
+		}
+	}
+
+	retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+	if (retval < 0) {
+		RTE_LOG(ERR, VHOST_PORT,
+			"Failed to get MAC address on port %u: %s\n",
+			port, rte_strerror(-retval));
+		return retval;
+	}
 
-	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
 	RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
 	RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
@@ -386,10 +410,7 @@ parse_portmask(const char *portmask)
 	/* parse hexadecimal string */
 	pm = strtoul(portmask, &end, 16);
 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
-		return -1;
-
-	if (pm == 0)
-		return -1;
+		return 0;
 
 	return pm;
 
@@ -440,7 +461,8 @@ us_vhost_usage(const char *prgname)
 	"		--tx-csum [0|1] disable/enable TX checksum offload.\n"
 	"		--tso [0|1] disable/enable TCP segment offload.\n"
 	"		--client register a vhost-user socket as client mode.\n"
-	"		--dequeue-zero-copy enables dequeue zero copy\n",
+	"		--dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
+	"		--dmas register dma channel for specific vhost device.\n",
 	       prgname);
 }
 
@@ -465,8 +487,9 @@ us_vhost_parse_args(int argc, char **argv)
 		{"tx-csum", required_argument, NULL, 0},
 		{"tso", required_argument, NULL, 0},
 		{"client", no_argument, &client_mode, 1},
-		{"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
 		{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+		{"dma-type", required_argument, NULL, 0},
+		{"dmas", required_argument, NULL, 0},
 		{NULL, 0, 0, 0},
 	};
 
@@ -609,6 +632,28 @@ us_vhost_parse_args(int argc, char **argv)
 				}
 			}
 
+			if (!strncmp(long_option[option_index].name,
+						"dma-type", MAX_LONG_OPT_SZ)) {
+				if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
+					RTE_LOG(INFO, VHOST_CONFIG,
+						"Wrong DMA type\n");
+					us_vhost_usage(prgname);
+					return -1;
+				}
+				strcpy(dma_type, optarg);
+			}
+
+			if (!strncmp(long_option[option_index].name,
+						"dmas", MAX_LONG_OPT_SZ)) {
+				if (open_dma(optarg) == -1) {
+					RTE_LOG(INFO, VHOST_CONFIG,
+						"Wrong DMA args\n");
+					us_vhost_usage(prgname);
+					return -1;
+				}
+				async_vhost_driver = 1;
+			}
+
 			break;
 
 			/* Invalid option - print options. */
@@ -764,13 +809,30 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
+	struct rte_mbuf *m_cpl[1], *comp_pkt;
+	uint32_t nr_comp = 0;
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+	} else if (async_vhost_driver) {
+		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+						&m, 1, &comp_pkt, &nr_comp);
+		if (nr_comp == 1)
+			goto done;
+
+		if (likely(ret))
+			dst_vdev->nr_async_pkts++;
+
+		while (likely(dst_vdev->nr_async_pkts)) {
+			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+					VIRTIO_RXQ, m_cpl, 1))
+				dst_vdev->nr_async_pkts--;
+		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
 
+done:
 	if (enable_stats) {
 		rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
 		rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -858,15 +920,15 @@ get_psd_sum(void *l3_hdr, uint64_t ol_flags)
 {
 	if (ol_flags & PKT_TX_IPV4)
 		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-	else /* assume ethertype == RTE_ETHER_TYPE_IPv6 */
+	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
 		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
 }
 
 static void virtio_tx_offload(struct rte_mbuf *m)
 {
 	void *l3_hdr;
-	struct ipv4_hdr *ipv4_hdr = NULL;
-	struct tcp_hdr *tcp_hdr = NULL;
+	struct rte_ipv4_hdr *ipv4_hdr = NULL;
+	struct rte_tcp_hdr *tcp_hdr = NULL;
 	struct rte_ether_hdr *eth_hdr =
 		rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
@@ -878,7 +940,7 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 		m->ol_flags |= PKT_TX_IP_CKSUM;
 	}
 
-	tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
+	tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len);
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
@@ -1015,14 +1077,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+						qid, p_cpl, MAX_PKT_BURST);
+	vdev->nr_async_pkts -= complete_count;
+	if (complete_count)
+		free_pkts(p_cpl, complete_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
 	uint16_t rx_count, enqueue_count;
-	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
+	uint32_t nr_comp = 0;
 
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
+
+	while (likely(vdev->nr_async_pkts))
+		complete_async_pkts(vdev, VIRTIO_RXQ);
+
 	if (!rx_count)
 		return;
 
@@ -1047,16 +1127,27 @@ drain_eth_rx(struct vhost_dev *vdev)
 	if (builtin_net_driver) {
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
+	} else if (async_vhost_driver) {
+		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+					VIRTIO_RXQ, pkts, rx_count, comp_pkts,
+					&nr_comp);
+		if (nr_comp > 0) {
+			free_pkts(comp_pkts, nr_comp);
+			enqueue_count -= nr_comp;
+		}
+		vdev->nr_async_pkts += enqueue_count;
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
+
 	if (enable_stats) {
 		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
 		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
-	free_pkts(pkts, rx_count);
+	if (!async_vhost_driver)
+		free_pkts(pkts, rx_count);
 }
 
 static __rte_always_inline void
@@ -1184,7 +1275,7 @@ destroy_device(int vid)
 
 
 	/* Set the dev_removal_flag on each lcore. */
-	RTE_LCORE_FOREACH_SLAVE(lcore)
+	RTE_LCORE_FOREACH_WORKER(lcore)
 		lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
 
 	/*
@@ -1192,7 +1283,7 @@ destroy_device(int vid)
 	 * we can be sure that they can no longer access the device removed
 	 * from the linked lists and that the devices are no longer in use.
 	 */
-	RTE_LCORE_FOREACH_SLAVE(lcore) {
+	RTE_LCORE_FOREACH_WORKER(lcore) {
 		while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
 			rte_pause();
 	}
@@ -1203,6 +1294,9 @@ destroy_device(int vid)
 		"(%d) device has been removed from data core\n",
 		vdev->vid);
 
+	if (async_vhost_driver)
+		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
 	rte_free(vdev);
 }
 
@@ -1216,7 +1310,6 @@ new_device(int vid)
 	int lcore, core_add = 0;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
-
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
 	if (vdev == NULL) {
 		RTE_LOG(INFO, VHOST_DATA,
@@ -1237,7 +1330,7 @@ new_device(int vid)
 	vdev->remove = 0;
 
 	/* Find a suitable lcore to add the device. */
-	RTE_LCORE_FOREACH_SLAVE(lcore) {
+	RTE_LCORE_FOREACH_WORKER(lcore) {
 		if (lcore_info[lcore].device_num < device_num_min) {
 			device_num_min = lcore_info[lcore].device_num;
 			core_add = lcore;
@@ -1257,6 +1350,20 @@ new_device(int vid)
 		"(%d) device has been added to data core %d\n",
 		vid, vdev->coreid);
 
+	if (async_vhost_driver) {
+		struct rte_vhost_async_features f;
+		struct rte_vhost_async_channel_ops channel_ops;
+		if (strncmp(dma_type, "ioat", 4) == 0) {
+			channel_ops.transfer_data = ioat_transfer_data_cb;
+			channel_ops.check_completed_copies =
+				ioat_check_completed_copies_cb;
+			f.async_inorder = 1;
+			f.async_threshold = 256;
+			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+				f.intval, &channel_ops);
+		}
+	}
+
 	return 0;
 }
 
@@ -1313,6 +1420,8 @@ print_stats(__rte_unused void *arg)
 		}
 
 		printf("===================================================\n");
+
+		fflush(stdout);
 	}
 
 	return NULL;
@@ -1484,18 +1593,18 @@ main(int argc, char *argv[])
 	}
 
 	/* Launch all data cores. */
-	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+	RTE_LCORE_FOREACH_WORKER(lcore_id)
 		rte_eal_remote_launch(switch_worker, NULL, lcore_id);
 
 	if (client_mode)
 		flags |= RTE_VHOST_USER_CLIENT;
 
-	if (dequeue_zero_copy)
-		flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
-
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+		if (async_vhost_driver)
+			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
 		ret = rte_vhost_driver_register(file, flags);
 		if (ret != 0) {
 			unregister_drivers(i);
@@ -1545,7 +1654,7 @@ main(int argc, char *argv[])
 		}
 	}
 
-	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+	RTE_LCORE_FOREACH_WORKER(lcore_id)
 		rte_eal_wait_lcore(lcore_id);
 
 	return 0;