X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=2dcdacbef72e6a978fa5c0364a985e1721e41425;hb=d5bbeefca82610c211b26390b0bd6a02704f10eb;hp=93319102d7a29489822686baeadfa8b98af5d728;hpb=c2ab5162dbd8837bf381be4627e3ba8efccf4562;p=dpdk.git diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 93319102d7..2dcdacbef7 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,7 +53,9 @@ #include "main.h" -#define MAX_QUEUES 512 +#ifndef MAX_QUEUES +#define MAX_QUEUES 128 +#endif /* the maximum number of external ports supported */ #define MAX_SUP_PORTS 1 @@ -61,28 +63,27 @@ /* * Calculate the number of buffers needed per port */ -#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ (num_switching_cores*MAX_PKT_BURST) + \ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ (num_switching_cores*MBUF_CACHE_SIZE)) -#define MBUF_CACHE_SIZE 128 -#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define MBUF_CACHE_SIZE 128 +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE /* * No frame data buffer allocated from host are required for zero copy * implementation, guest will allocate the frame data buffer, and vhost * directly use it. */ -#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518 -#define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \ - + RTE_PKTMBUF_HEADROOM) +#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE #define MBUF_CACHE_SIZE_ZCP 0 -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ -#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ -#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ #define JUMBO_FRAME_MAX_SIZE 0x2600 @@ -139,6 +140,8 @@ /* Number of descriptors per cacheline. */ #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) +#define MBUF_EXT_MEM(mb) (rte_mbuf_from_indirect(mb) != (mb)) + /* mask of enabled ports */ static uint32_t enabled_port_mask = 0; @@ -159,6 +162,9 @@ static uint32_t num_devices; static uint32_t zero_copy; static int mergeable; +/* Do vlan strip on host, enabled on default */ +static uint32_t vlan_strip = 1; + /* number of descriptors to apply*/ static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP; static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP; @@ -390,6 +396,9 @@ port_init(uint8_t port) txconf = &dev_info.default_txconf; rxconf->rx_drop_en = 1; + /* Enable vlan offload */ + txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL; + /* * Zero copy defers queue RX/TX start to the time when guest * finishes its startup and packet buffers from that guest are @@ -561,6 +570,7 @@ us_vhost_usage(const char *prgname) " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" + " --vlan-strip [0|1]: disable/enable(default) RX VLAN strip on host\n" " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" " --dev-basename: The basename to be used for the character device.\n" " --zero-copy [0|1]: disable(default)/enable rx/tx " @@ -588,6 +598,7 @@ us_vhost_parse_args(int argc, char **argv) {"rx-retry-delay", required_argument, NULL, 0}, {"rx-retry-num", required_argument, NULL, 0}, {"mergeable", required_argument, NULL, 0}, + {"vlan-strip", required_argument, NULL, 0}, {"stats", required_argument, NULL, 0}, {"dev-basename", required_argument, NULL, 0}, {"zero-copy", required_argument, NULL, 0}, @@ -688,6 +699,22 @@ us_vhost_parse_args(int argc, char **argv) } } + /* Enable/disable RX VLAN strip on host. */ + if (!strncmp(long_option[option_index].name, + "vlan-strip", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for VLAN strip [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + vlan_strip = !!ret; + vmdq_conf_default.rxmode.hw_vlan_strip = + vlan_strip; + } + } + /* Enable/disable stats. */ if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) { ret = parse_num_opt(optarg, INT32_MAX); @@ -721,19 +748,6 @@ us_vhost_parse_args(int argc, char **argv) return -1; } else zero_copy = ret; - - if (zero_copy) { -#ifdef RTE_MBUF_REFCNT - RTE_LOG(ERR, VHOST_CONFIG, "Before running " - "zero copy vhost APP, please " - "disable RTE_MBUF_REFCNT\n" - "in config file and then rebuild DPDK " - "core lib!\n" - "Otherwise please disable zero copy " - "flag in command line!\n"); - return -1; -#endif - } } /* Specify the descriptor number on RX. */ @@ -947,7 +961,9 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) dev->device_fh); /* Enable stripping of the vlan tag as we handle routing. */ - rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)vdev->vmdq_rx_q, 1); + if (vlan_strip) + rte_eth_dev_set_vlan_strip_on_queue(ports[0], + (uint16_t)vdev->vmdq_rx_q, 1); /* Set device as ready for RX. */ vdev->ready = DEVICE_RX; @@ -1036,8 +1052,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) rte_atomic64_add( &dev_statistics[tdev->device_fh].rx_atomic, ret); - dev_statistics[tdev->device_fh].tx_total++; - dev_statistics[tdev->device_fh].tx += ret; + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; } } @@ -1091,7 +1107,7 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, "(%"PRIu64") TX: pkt to local VM device id:" "(%"PRIu64") vlan tag: %d.\n", dev->device_fh, dev_ll->vdev->dev->device_fh, - vlan_tag); + (int)*vlan_tag); break; } @@ -1112,6 +1128,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) unsigned len, ret, offset = 0; const uint16_t lcore_id = rte_lcore_id(); struct virtio_net *dev = vdev->dev; + struct ether_hdr *nh; /*check if destination is local VM*/ if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) { @@ -1132,28 +1149,38 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) tx_q = &lcore_tx_queue[lcore_id]; len = tx_q->len; - m->ol_flags = PKT_TX_VLAN_PKT; + nh = rte_pktmbuf_mtod(m, struct ether_hdr *); + if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) { + /* Guest has inserted the vlan tag. */ + struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1); + uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); + if ((vm2vm_mode == VM2VM_HARDWARE) && + (vh->vlan_tci != vlan_tag_be)) + vh->vlan_tci = vlan_tag_be; + } else { + m->ol_flags = PKT_TX_VLAN_PKT; - /* - * Find the right seg to adjust the data len when offset is - * bigger than tail room size. - */ - if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { - if (likely(offset <= rte_pktmbuf_tailroom(m))) - m->data_len += offset; - else { - struct rte_mbuf *seg = m; + /* + * Find the right seg to adjust the data len when offset is + * bigger than tail room size. + */ + if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { + if (likely(offset <= rte_pktmbuf_tailroom(m))) + m->data_len += offset; + else { + struct rte_mbuf *seg = m; - while ((seg->next != NULL) && - (offset > rte_pktmbuf_tailroom(seg))) - seg = seg->next; + while ((seg->next != NULL) && + (offset > rte_pktmbuf_tailroom(seg))) + seg = seg->next; - seg->data_len += offset; + seg->data_len += offset; + } + m->pkt_len += offset; } - m->pkt_len += offset; - } - m->vlan_tci = vlan_tag; + m->vlan_tci = vlan_tag; + } tx_q->m_table[len] = m; len++; @@ -1305,8 +1332,8 @@ switch_worker(__attribute__((unused)) void *arg) /* If this is the first received packet we need to learn the MAC and setup VMDQ */ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) { if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) { - while (tx_count--) - rte_pktmbuf_free(pkts_burst[tx_count]); + while (tx_count) + rte_pktmbuf_free(pkts_burst[--tx_count]); } } while (tx_count) @@ -1408,7 +1435,7 @@ put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); } /* @@ -1424,7 +1451,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) uint64_t buff_addr, phys_addr; struct vhost_virtqueue *vq; struct vring_desc *desc; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; struct vpool *vpool; hpa_type addr_type; struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; @@ -1475,7 +1503,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) } } while (unlikely(phys_addr == 0)); - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: " @@ -1492,7 +1521,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) "size required: %d\n", dev->device_fh, desc->len, desc_idx, vpool->buf_size); put_desc_to_used_list_zcp(vq, desc_idx); - rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); + rte_ring_sp_enqueue(vpool->ring, obj); return; } @@ -1524,7 +1553,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) static inline void pktmbuf_detach_zcp(struct rte_mbuf *m) { const struct rte_mempool *mp = m->pool; - void *buf = RTE_MBUF_TO_BADDR(m); + void *buf = rte_mbuf_to_baddr(m); uint32_t buf_ofs; uint32_t buf_len = mp->elt_size - sizeof(*m); m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m); @@ -1564,7 +1593,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) for (index = 0; index < mbuf_count; index++) { mbuf = __rte_mbuf_raw_alloc(vpool->pool); - if (likely(RTE_MBUF_INDIRECT(mbuf))) + if (likely(MBUF_EXT_MEM(mbuf))) pktmbuf_detach_zcp(mbuf); rte_ring_sp_enqueue(vpool->ring, mbuf); @@ -1601,7 +1630,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return 0; } @@ -1627,7 +1656,7 @@ static void mbuf_destroy_zcp(struct vpool *vpool) for (index = 0; index < mbuf_count; index++) { mbuf = __rte_mbuf_raw_alloc(vpool->pool); if (likely(mbuf != NULL)) { - if (likely(RTE_MBUF_INDIRECT(mbuf))) + if (likely(MBUF_EXT_MEM(mbuf))) pktmbuf_detach_zcp(mbuf); rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); } @@ -1749,7 +1778,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return count; } @@ -1764,7 +1793,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, { struct mbuf_table *tx_q; struct rte_mbuf **m_table; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; unsigned len, ret, offset = 0; struct vpool *vpool; uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; @@ -1776,7 +1806,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, /* Allocate an mbuf and populate the structure. */ vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q]; - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; RTE_LOG(ERR, VHOST_DATA, @@ -2589,9 +2620,10 @@ new_device (struct virtio_net *dev) } - vdev->regions_hpa = (struct virtio_memory_regions_hpa *) rte_zmalloc("vhost hpa region", - sizeof(struct virtio_memory_regions_hpa) * vdev->nregions_hpa, - RTE_CACHE_LINE_SIZE); + vdev->regions_hpa = rte_calloc("vhost hpa region", + vdev->nregions_hpa, + sizeof(struct virtio_memory_regions_hpa), + RTE_CACHE_LINE_SIZE); if (vdev->regions_hpa == NULL) { RTE_LOG(ERR, VHOST_CONFIG, "Cannot allocate memory for hpa region\n"); rte_free(vdev); @@ -2720,8 +2752,7 @@ new_device (struct virtio_net *dev) RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh); vdev->ready = DEVICE_SAFE_REMOVE; destroy_device(dev); - if (vdev->regions_hpa) - rte_free(vdev->regions_hpa); + rte_free(vdev->regions_hpa); rte_free(vdev); return -1; } @@ -2818,12 +2849,8 @@ static void setup_mempool_tbl(int socket, uint32_t index, char *pool_name, char *ring_name, uint32_t nb_mbuf) { - uint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM; - vpool_array[index].pool - = rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP, - MBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize, - rte_pktmbuf_init, NULL, socket, 0); + vpool_array[index].pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf, + MBUF_CACHE_SIZE_ZCP, 0, MBUF_DATA_SIZE_ZCP, socket); if (vpool_array[index].pool != NULL) { vpool_array[index].ring = rte_ring_create(ring_name, @@ -2844,12 +2871,22 @@ setup_mempool_tbl(int socket, uint32_t index, char *pool_name, } /* Need consider head room. */ - vpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM; + vpool_array[index].buf_size = VIRTIO_DESCRIPTOR_LEN_ZCP; } else { rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name); } } +/* When we receive a INT signal, unregister vhost driver */ +static void +sigint_handler(__rte_unused int signum) +{ + /* Unregister vhost driver. */ + int ret = rte_vhost_driver_unregister((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n"); + exit(0); +} /* * Main function, does initialisation and calls the per-lcore functions. The CUSE @@ -2865,6 +2902,9 @@ main(int argc, char *argv[]) uint8_t portid; uint16_t queue_id; static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + signal(SIGINT, sigint_handler); /* init EAL */ ret = rte_eal_init(argc, argv); @@ -2907,15 +2947,9 @@ main(int argc, char *argv[]) if (zero_copy == 0) { /* Create the mbuf pool. */ - mbuf_pool = rte_mempool_create( - "MBUF_POOL", - NUM_MBUFS_PER_PORT - * valid_num_ports, - MBUF_SIZE, MBUF_CACHE_SIZE, - sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, NULL, - rte_pktmbuf_init, NULL, - rte_socket_id(), 0); + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE, + 0, MBUF_DATA_SIZE, rte_socket_id()); if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); @@ -2991,8 +3025,19 @@ main(int argc, char *argv[]) memset(&dev_statistics, 0, sizeof(dev_statistics)); /* Enable stats if the user option is set. */ - if (enable_stats) - pthread_create(&tid, NULL, (void*)print_stats, NULL ); + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } /* Launch all data cores. */ if (zero_copy == 0) { @@ -3036,10 +3081,10 @@ main(int argc, char *argv[]) if (mergeable == 0) rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); - /* Register CUSE device to handle IOCTLs. */ + /* Register vhost(cuse or user) driver to handle vhost messages. */ ret = rte_vhost_driver_register((char *)&dev_basename); if (ret != 0) - rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n"); + rte_exit(EXIT_FAILURE, "vhost driver register failure.\n"); rte_vhost_driver_callback_register(&virtio_net_device_ops); @@ -3048,4 +3093,3 @@ main(int argc, char *argv[]) return 0; } -