X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=22309977cebd6dc89eaef70f08961dd31fd3fef3;hb=ecf1474a618ba77ef3c4471fdc004a8c5f3b4d11;hp=84e0d6366d9333d3ad4c7d2b737c3e56e73b238b;hpb=d9a42a69febf453cdb735e77fc0e01463ddf4acc;p=dpdk.git diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 84e0d6366d..22309977ce 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -25,6 +25,7 @@ #include #include +#include "ioat.h" #include "main.h" #ifndef MAX_QUEUES @@ -55,12 +56,6 @@ #define INVALID_PORT_ID 0xFF -/* Max number of devices. Limited by vmdq. */ -#define MAX_DEVICES 64 - -/* Size of buffers used for snprintfs. */ -#define MAX_PRINT_BUFF 6072 - /* Maximum long option length for option parsing. */ #define MAX_LONG_OPT_SZ 64 @@ -98,10 +93,13 @@ static uint32_t enable_tx_csum; static uint32_t enable_tso; static int client_mode; -static int dequeue_zero_copy; static int builtin_net_driver; +static int async_vhost_driver; + +static char dma_type[MAX_LONG_OPT_SZ]; + /* Specify timeout (in useconds) between retries on RX. */ static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; /* Specify the number of retries on RX. */ @@ -116,14 +114,12 @@ static struct rte_eth_conf vmdq_conf_default = { .rxmode = { .mq_mode = ETH_MQ_RX_VMDQ_ONLY, .split_hdr_size = 0, - .ignore_offload_bitfield = 1, /* * VLAN strip is necessary for 1G NIC such as I350, * this fixes bug of ipv4 forwarding in guest can't * forward pakets from one virtio dev to another virtio dev. */ - .offloads = (DEV_RX_OFFLOAD_CRC_STRIP | - DEV_RX_OFFLOAD_VLAN_STRIP), + .offloads = DEV_RX_OFFLOAD_VLAN_STRIP, }, .txmode = { @@ -169,7 +165,7 @@ const uint16_t vlan_tags[] = { }; /* ethernet addresses of ports */ -static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; +static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; static struct vhost_dev_tailq_list vhost_dev_list = TAILQ_HEAD_INITIALIZER(vhost_dev_list); @@ -190,6 +186,15 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; / US_PER_S * BURST_TX_DRAIN_US) #define VLAN_HLEN 4 +static inline int +open_dma(const char *value) +{ + if (strncmp(dma_type, "ioat", 4) == 0) + return open_ioat(value); + + return -1; +} + /* * Builds up the correct configuration for VMDQ VLAN pool map * according to the pool & queue limits. @@ -219,21 +224,6 @@ get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) return 0; } -/* - * Validate the device number according to the max pool number gotten form - * dev_info. If the device number is invalid, give the error message and - * return -1. Each device must have its own pool. - */ -static inline int -validate_num_devices(uint32_t max_nb_devices) -{ - if (num_devices > max_nb_devices) { - RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n"); - return -1; - } - return 0; -} - /* * Initialises a given port using global settings and with the rx buffers * coming from the mbuf_pool passed as parameter @@ -251,12 +241,18 @@ port_init(uint16_t port) uint16_t q; /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ - rte_eth_dev_info_get (port, &dev_info); + retval = rte_eth_dev_info_get(port, &dev_info); + if (retval != 0) { + RTE_LOG(ERR, VHOST_PORT, + "Error during getting device (port %u) info: %s\n", + port, strerror(-retval)); + + return retval; + } rxconf = &dev_info.default_rxconf; txconf = &dev_info.default_txconf; rxconf->rx_drop_en = 1; - txconf->txq_flags = ETH_TXQ_FLAGS_IGNORE; /*configure the number of supported virtio devices based on VMDQ limits */ num_devices = dev_info.max_vmdq_pools; @@ -264,22 +260,8 @@ port_init(uint16_t port) rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; - /* - * When dequeue zero copy is enabled, guest Tx used vring will be - * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc - * (tx_ring_size here) must be small enough so that the driver will - * hit the free threshold easily and free mbufs timely. Otherwise, - * guest Tx vring would be starved. - */ - if (dequeue_zero_copy) - tx_ring_size = 64; - tx_rings = (uint16_t)rte_lcore_count(); - retval = validate_num_devices(MAX_DEVICES); - if (retval < 0) - return retval; - /* Get port configuration. */ retval = get_eth_conf(&port_conf, num_devices); if (retval < 0) @@ -357,10 +339,24 @@ port_init(uint16_t port) return retval; } - if (promiscuous) - rte_eth_promiscuous_enable(port); + if (promiscuous) { + retval = rte_eth_promiscuous_enable(port); + if (retval != 0) { + RTE_LOG(ERR, VHOST_PORT, + "Failed to enable promiscuous mode on port %u: %s\n", + port, rte_strerror(-retval)); + return retval; + } + } + + retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); + if (retval < 0) { + RTE_LOG(ERR, VHOST_PORT, + "Failed to get MAC address on port %u: %s\n", + port, rte_strerror(-retval)); + return retval; + } - rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", @@ -381,12 +377,20 @@ port_init(uint16_t port) static int us_vhost_parse_socket_path(const char *q_arg) { + char *old; + /* parse number string */ if (strnlen(q_arg, PATH_MAX) == PATH_MAX) return -1; + old = socket_files; socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1)); - snprintf(socket_files + nb_sockets * PATH_MAX, PATH_MAX, "%s", q_arg); + if (socket_files == NULL) { + free(old); + return -1; + } + + strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX); nb_sockets++; return 0; @@ -406,10 +410,7 @@ parse_portmask(const char *portmask) /* parse hexadecimal string */ pm = strtoul(portmask, &end, 16); if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) - return -1; - - if (pm == 0) - return -1; + return 0; return pm; @@ -460,7 +461,8 @@ us_vhost_usage(const char *prgname) " --tx-csum [0|1] disable/enable TX checksum offload.\n" " --tso [0|1] disable/enable TCP segment offload.\n" " --client register a vhost-user socket as client mode.\n" - " --dequeue-zero-copy enables dequeue zero copy\n", + " --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n" + " --dmas register dma channel for specific vhost device.\n", prgname); } @@ -485,8 +487,9 @@ us_vhost_parse_args(int argc, char **argv) {"tx-csum", required_argument, NULL, 0}, {"tso", required_argument, NULL, 0}, {"client", no_argument, &client_mode, 1}, - {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1}, {"builtin-net-driver", no_argument, &builtin_net_driver, 1}, + {"dma-type", required_argument, NULL, 0}, + {"dmas", required_argument, NULL, 0}, {NULL, 0, 0, 0}, }; @@ -629,6 +632,28 @@ us_vhost_parse_args(int argc, char **argv) } } + if (!strncmp(long_option[option_index].name, + "dma-type", MAX_LONG_OPT_SZ)) { + if (strlen(optarg) >= MAX_LONG_OPT_SZ) { + RTE_LOG(INFO, VHOST_CONFIG, + "Wrong DMA type\n"); + us_vhost_usage(prgname); + return -1; + } + strcpy(dma_type, optarg); + } + + if (!strncmp(long_option[option_index].name, + "dmas", MAX_LONG_OPT_SZ)) { + if (open_dma(optarg) == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Wrong DMA args\n"); + us_vhost_usage(prgname); + return -1; + } + async_vhost_driver = 1; + } + break; /* Invalid option - print options. */ @@ -680,13 +705,13 @@ static unsigned check_ports_num(unsigned nb_ports) } static __rte_always_inline struct vhost_dev * -find_vhost_dev(struct ether_addr *mac) +find_vhost_dev(struct rte_ether_addr *mac) { struct vhost_dev *vdev; TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { if (vdev->ready == DEVICE_RX && - is_same_ether_addr(mac, &vdev->mac_address)) + rte_is_same_ether_addr(mac, &vdev->mac_address)) return vdev; } @@ -700,11 +725,11 @@ find_vhost_dev(struct ether_addr *mac) static int link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) { - struct ether_hdr *pkt_hdr; + struct rte_ether_hdr *pkt_hdr; int i, ret; /* Learn MAC address of guest device from packet */ - pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); if (find_vhost_dev(&pkt_hdr->s_addr)) { RTE_LOG(ERR, VHOST_DATA, @@ -713,7 +738,7 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) return -1; } - for (i = 0; i < ETHER_ADDR_LEN; i++) + for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i]; /* vlan_tag currently uses the device_id. */ @@ -784,13 +809,30 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, struct rte_mbuf *m) { uint16_t ret; + struct rte_mbuf *m_cpl[1], *comp_pkt; + uint32_t nr_comp = 0; if (builtin_net_driver) { ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); + } else if (async_vhost_driver) { + ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, + &m, 1, &comp_pkt, &nr_comp); + if (nr_comp == 1) + goto done; + + if (likely(ret)) + dst_vdev->nr_async_pkts++; + + while (likely(dst_vdev->nr_async_pkts)) { + if (rte_vhost_poll_enqueue_completed(dst_vdev->vid, + VIRTIO_RXQ, m_cpl, 1)) + dst_vdev->nr_async_pkts--; + } } else { ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); } +done: if (enable_stats) { rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic); rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret); @@ -806,10 +848,10 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, static __rte_always_inline int virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) { - struct ether_hdr *pkt_hdr; + struct rte_ether_hdr *pkt_hdr; struct vhost_dev *dst_vdev; - pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); dst_vdev = find_vhost_dev(&pkt_hdr->d_addr); if (!dst_vdev) @@ -844,7 +886,8 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m, uint32_t *offset, uint16_t *vlan_tag) { struct vhost_dev *dst_vdev; - struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ether_hdr *pkt_hdr = + rte_pktmbuf_mtod(m, struct rte_ether_hdr *); dst_vdev = find_vhost_dev(&pkt_hdr->d_addr); if (!dst_vdev) @@ -877,16 +920,17 @@ get_psd_sum(void *l3_hdr, uint64_t ol_flags) { if (ol_flags & PKT_TX_IPV4) return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ + else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); } static void virtio_tx_offload(struct rte_mbuf *m) { void *l3_hdr; - struct ipv4_hdr *ipv4_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ipv4_hdr *ipv4_hdr = NULL; + struct rte_tcp_hdr *tcp_hdr = NULL; + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(m, struct rte_ether_hdr *); l3_hdr = (char *)eth_hdr + m->l2_len; @@ -896,7 +940,7 @@ static void virtio_tx_offload(struct rte_mbuf *m) m->ol_flags |= PKT_TX_IP_CKSUM; } - tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len); + tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len); tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); } @@ -930,11 +974,11 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) struct mbuf_table *tx_q; unsigned offset = 0; const uint16_t lcore_id = rte_lcore_id(); - struct ether_hdr *nh; + struct rte_ether_hdr *nh; - nh = rte_pktmbuf_mtod(m, struct ether_hdr *); - if (unlikely(is_broadcast_ether_addr(&nh->d_addr))) { + nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + if (unlikely(rte_is_broadcast_ether_addr(&nh->d_addr))) { struct vhost_dev *vdev2; TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) { @@ -966,10 +1010,10 @@ queue2nic: /*Add packet to the port tx queue*/ tx_q = &lcore_tx_queue[lcore_id]; - nh = rte_pktmbuf_mtod(m, struct ether_hdr *); - if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) { + nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) { /* Guest has inserted the vlan tag. */ - struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1); + struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1); uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); if ((vm2vm_mode == VM2VM_HARDWARE) && (vh->vlan_tci != vlan_tag_be)) @@ -1033,14 +1077,32 @@ drain_mbuf_table(struct mbuf_table *tx_q) } } +static __rte_always_inline void +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) +{ + struct rte_mbuf *p_cpl[MAX_PKT_BURST]; + uint16_t complete_count; + + complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, + qid, p_cpl, MAX_PKT_BURST); + vdev->nr_async_pkts -= complete_count; + if (complete_count) + free_pkts(p_cpl, complete_count); +} + static __rte_always_inline void drain_eth_rx(struct vhost_dev *vdev) { uint16_t rx_count, enqueue_count; - struct rte_mbuf *pkts[MAX_PKT_BURST]; + struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST]; + uint32_t nr_comp = 0; rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, pkts, MAX_PKT_BURST); + + while (likely(vdev->nr_async_pkts)) + complete_async_pkts(vdev, VIRTIO_RXQ); + if (!rx_count) return; @@ -1065,16 +1127,27 @@ drain_eth_rx(struct vhost_dev *vdev) if (builtin_net_driver) { enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, pkts, rx_count); + } else if (async_vhost_driver) { + enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, + VIRTIO_RXQ, pkts, rx_count, comp_pkts, + &nr_comp); + if (nr_comp > 0) { + free_pkts(comp_pkts, nr_comp); + enqueue_count -= nr_comp; + } + vdev->nr_async_pkts += enqueue_count; } else { enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, pkts, rx_count); } + if (enable_stats) { rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count); rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count); } - free_pkts(pkts, rx_count); + if (!async_vhost_driver) + free_pkts(pkts, rx_count); } static __rte_always_inline void @@ -1202,7 +1275,7 @@ destroy_device(int vid) /* Set the dev_removal_flag on each lcore. */ - RTE_LCORE_FOREACH_SLAVE(lcore) + RTE_LCORE_FOREACH_WORKER(lcore) lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL; /* @@ -1210,7 +1283,7 @@ destroy_device(int vid) * we can be sure that they can no longer access the device removed * from the linked lists and that the devices are no longer in use. */ - RTE_LCORE_FOREACH_SLAVE(lcore) { + RTE_LCORE_FOREACH_WORKER(lcore) { while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL) rte_pause(); } @@ -1221,12 +1294,15 @@ destroy_device(int vid) "(%d) device has been removed from data core\n", vdev->vid); + if (async_vhost_driver) + rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); + rte_free(vdev); } /* * A new device is added to a data core. First the device is added to the main linked list - * and the allocated to a specific data core. + * and then allocated to a specific data core. */ static int new_device(int vid) @@ -1234,7 +1310,6 @@ new_device(int vid) int lcore, core_add = 0; uint32_t device_num_min = num_devices; struct vhost_dev *vdev; - vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); if (vdev == NULL) { RTE_LOG(INFO, VHOST_DATA, @@ -1255,7 +1330,7 @@ new_device(int vid) vdev->remove = 0; /* Find a suitable lcore to add the device. */ - RTE_LCORE_FOREACH_SLAVE(lcore) { + RTE_LCORE_FOREACH_WORKER(lcore) { if (lcore_info[lcore].device_num < device_num_min) { device_num_min = lcore_info[lcore].device_num; core_add = lcore; @@ -1275,6 +1350,20 @@ new_device(int vid) "(%d) device has been added to data core %d\n", vid, vdev->coreid); + if (async_vhost_driver) { + struct rte_vhost_async_features f; + struct rte_vhost_async_channel_ops channel_ops; + if (strncmp(dma_type, "ioat", 4) == 0) { + channel_ops.transfer_data = ioat_transfer_data_cb; + channel_ops.check_completed_copies = + ioat_check_completed_copies_cb; + f.async_inorder = 1; + f.async_threshold = 256; + return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, + f.intval, &channel_ops); + } + } + return 0; } @@ -1292,8 +1381,8 @@ static const struct vhost_device_ops virtio_net_device_ops = * This is a thread will wake up after a period to print stats if the user has * enabled them. */ -static void -print_stats(void) +static void * +print_stats(__rte_unused void *arg) { struct vhost_dev *vdev; uint64_t tx_dropped, rx_dropped; @@ -1331,7 +1420,11 @@ print_stats(void) } printf("===================================================\n"); + + fflush(stdout); } + + return NULL; } static void @@ -1420,7 +1513,6 @@ main(int argc, char *argv[]) int ret, i; uint16_t portid; static pthread_t tid; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; uint64_t flags = 0; signal(SIGINT, sigint_handler); @@ -1493,32 +1585,26 @@ main(int argc, char *argv[]) /* Enable stats if the user option is set. */ if (enable_stats) { - ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); - if (ret != 0) + ret = rte_ctrl_thread_create(&tid, "print-stats", NULL, + print_stats, NULL); + if (ret < 0) rte_exit(EXIT_FAILURE, "Cannot create print-stats thread\n"); - - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); - ret = rte_thread_setname(tid, thread_name); - if (ret != 0) - RTE_LOG(DEBUG, VHOST_CONFIG, - "Cannot set print-stats name\n"); } /* Launch all data cores. */ - RTE_LCORE_FOREACH_SLAVE(lcore_id) + RTE_LCORE_FOREACH_WORKER(lcore_id) rte_eal_remote_launch(switch_worker, NULL, lcore_id); if (client_mode) flags |= RTE_VHOST_USER_CLIENT; - if (dequeue_zero_copy) - flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - /* Register vhost user driver to handle vhost messages. */ for (i = 0; i < nb_sockets; i++) { char *file = socket_files + i * PATH_MAX; + if (async_vhost_driver) + flags = flags | RTE_VHOST_USER_ASYNC_COPY; + ret = rte_vhost_driver_register(file, flags); if (ret != 0) { unregister_drivers(i); @@ -1568,7 +1654,7 @@ main(int argc, char *argv[]) } } - RTE_LCORE_FOREACH_SLAVE(lcore_id) + RTE_LCORE_FOREACH_WORKER(lcore_id) rte_eal_wait_lcore(lcore_id); return 0;