X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=28c17afd556db9f463bf34a8211b5ddde52a02fb;hb=2a73125b7041;hp=1f1edbea202b77b0ffbdf19fe510696453953b06;hpb=90924caf0831906ceac6b450048fd822659e22d3;p=dpdk.git diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 1f1edbea20..28c17afd55 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,10 +50,14 @@ #include #include #include +#include +#include #include "main.h" +#ifndef MAX_QUEUES #define MAX_QUEUES 128 +#endif /* the maximum number of external ports supported */ #define MAX_SUP_PORTS 1 @@ -61,47 +65,27 @@ /* * Calculate the number of buffers needed per port */ -#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ (num_switching_cores*MAX_PKT_BURST) + \ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ - (num_switching_cores*MBUF_CACHE_SIZE)) + ((num_switching_cores+1)*MBUF_CACHE_SIZE)) -#define MBUF_CACHE_SIZE 128 -#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define MBUF_CACHE_SIZE 128 +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE /* * No frame data buffer allocated from host are required for zero copy * implementation, guest will allocate the frame data buffer, and vhost * directly use it. */ -#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518 -#define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \ - + RTE_PKTMBUF_HEADROOM) +#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE #define MBUF_CACHE_SIZE_ZCP 0 -/* - * RX and TX Prefetch, Host, and Write-back threshold values should be - * carefully set for optimal performance. Consult the network - * controller's datasheet and supporting DPDK documentation for guidance - * on how these parameters should be set. - */ -#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */ -#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */ -#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */ - -/* - * These default values are optimized for use with the Intel(R) 82599 10 GbE - * Controller and the DPDK ixgbe PMD. Consider using other values for other - * network controllers and/or network drivers. - */ -#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */ -#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */ -#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */ - -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ -#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ -#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ #define JUMBO_FRAME_MAX_SIZE 0x2600 @@ -156,7 +140,9 @@ #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL /* Number of descriptors per cacheline. */ -#define DESC_PER_CACHELINE (CACHE_LINE_SIZE / sizeof(struct vring_desc)) +#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) + +#define MBUF_EXT_MEM(mb) (rte_mbuf_from_indirect(mb) != (mb)) /* mask of enabled ports */ static uint32_t enabled_port_mask = 0; @@ -178,6 +164,9 @@ static uint32_t num_devices; static uint32_t zero_copy; static int mergeable; +/* Do vlan strip on host, enabled on default */ +static uint32_t vlan_strip = 1; + /* number of descriptors to apply*/ static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP; static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP; @@ -212,6 +201,13 @@ typedef enum { static uint32_t enable_stats = 0; /* Enable retries on RX. */ static uint32_t enable_retry = 1; + +/* Disable TX checksum offload */ +static uint32_t enable_tx_csum; + +/* Disable TSO offload */ +static uint32_t enable_tso; + /* Specify timeout (in useconds) between retries on RX. */ static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; /* Specify the number of retries on RX. */ @@ -220,32 +216,6 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; /* Character device basename. Can be set by user. */ static char dev_basename[MAX_BASENAME_SZ] = "vhost-net"; - -/* Default configuration for rx and tx thresholds etc. */ -static struct rte_eth_rxconf rx_conf_default = { - .rx_thresh = { - .pthresh = RX_PTHRESH, - .hthresh = RX_HTHRESH, - .wthresh = RX_WTHRESH, - }, - .rx_drop_en = 1, -}; - -/* - * These default values are optimized for use with the Intel(R) 82599 10 GbE - * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other - * network controllers and/or network drivers. - */ -static struct rte_eth_txconf tx_conf_default = { - .tx_thresh = { - .pthresh = TX_PTHRESH, - .hthresh = TX_HTHRESH, - .wthresh = TX_WTHRESH, - }, - .tx_free_thresh = 0, /* Use PMD default values */ - .tx_rs_thresh = 0, /* Use PMD default values */ -}; - /* empty vmdq configuration structure. Filled in programatically */ static struct rte_eth_conf vmdq_conf_default = { .rxmode = { @@ -285,6 +255,9 @@ static struct rte_eth_conf vmdq_conf_default = { static unsigned lcore_ids[RTE_MAX_LCORE]; static uint8_t ports[RTE_MAX_ETHPORTS]; static unsigned num_ports = 0; /**< The number of ports specified in command line */ +static uint16_t num_pf_queues, num_vmdq_queues; +static uint16_t vmdq_pool_base, vmdq_queue_base; +static uint16_t queues_per_pool; static const uint16_t external_pkt_default_vlan_tag = 2000; const uint16_t vlan_tags[] = { @@ -330,20 +303,6 @@ struct vlan_ethhdr { __be16 h_vlan_encapsulated_proto; }; -/* IPv4 Header */ -struct ipv4_hdr { - uint8_t version_ihl; /**< version and header length */ - uint8_t type_of_service; /**< type of service */ - uint16_t total_length; /**< length of packet */ - uint16_t packet_id; /**< packet ID */ - uint16_t fragment_offset; /**< fragmentation offset */ - uint8_t time_to_live; /**< time to live */ - uint8_t next_proto_id; /**< protocol ID */ - uint16_t hdr_checksum; /**< header checksum */ - uint32_t src_addr; /**< source address */ - uint32_t dst_addr; /**< destination address */ -} __attribute__((__packed__)); - /* Header lengths. */ #define VLAN_HLEN 4 #define VLAN_ETH_HLEN 18 @@ -412,7 +371,9 @@ port_init(uint8_t port) { struct rte_eth_dev_info dev_info; struct rte_eth_conf port_conf; - uint16_t rx_rings, tx_rings; + struct rte_eth_rxconf *rxconf; + struct rte_eth_txconf *txconf; + int16_t rx_rings, tx_rings; uint16_t rx_ring_size, tx_ring_size; int retval; uint16_t q; @@ -420,9 +381,32 @@ port_init(uint8_t port) /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ rte_eth_dev_info_get (port, &dev_info); + if (dev_info.max_rx_queues > MAX_QUEUES) { + rte_exit(EXIT_FAILURE, + "please define MAX_QUEUES no less than %u in %s\n", + dev_info.max_rx_queues, __FILE__); + } + + rxconf = &dev_info.default_rxconf; + txconf = &dev_info.default_txconf; + rxconf->rx_drop_en = 1; + + /* Enable vlan offload */ + txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL; + + /* + * Zero copy defers queue RX/TX start to the time when guest + * finishes its startup and packet buffers from that guest are + * available. + */ + if (zero_copy) { + rxconf->rx_deferred_start = 1; + rxconf->rx_drop_en = 0; + txconf->tx_deferred_start = 1; + } + /*configure the number of supported virtio devices based on VMDQ limits */ num_devices = dev_info.max_vmdq_pools; - num_queues = dev_info.max_rx_queues; if (zero_copy) { rx_ring_size = num_rx_descriptor; @@ -442,10 +426,27 @@ port_init(uint8_t port) retval = get_eth_conf(&port_conf, num_devices); if (retval < 0) return retval; + /* NIC queues are divided into pf queues and vmdq queues. */ + num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; + queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; + num_vmdq_queues = num_devices * queues_per_pool; + num_queues = num_pf_queues + num_vmdq_queues; + vmdq_queue_base = dev_info.vmdq_queue_base; + vmdq_pool_base = dev_info.vmdq_pool_base; + printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n", + num_pf_queues, num_devices, queues_per_pool); if (port >= rte_eth_dev_count()) return -1; - rx_rings = (uint16_t)num_queues, + if (enable_tx_csum == 0) + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM); + + if (enable_tso == 0) { + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + } + + rx_rings = (uint16_t)dev_info.max_rx_queues; /* Configure ethernet device. */ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); if (retval != 0) @@ -454,14 +455,16 @@ port_init(uint8_t port) /* Setup the queues. */ for (q = 0; q < rx_rings; q ++) { retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, - rte_eth_dev_socket_id(port), &rx_conf_default, + rte_eth_dev_socket_id(port), + rxconf, vpool_array[q].pool); if (retval < 0) return retval; } for (q = 0; q < tx_rings; q ++) { retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, - rte_eth_dev_socket_id(port), &tx_conf_default); + rte_eth_dev_socket_id(port), + txconf); if (retval < 0) return retval; } @@ -570,6 +573,7 @@ us_vhost_usage(const char *prgname) " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" + " --vlan-strip [0|1]: disable/enable(default) RX VLAN strip on host\n" " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" " --dev-basename: The basename to be used for the character device.\n" " --zero-copy [0|1]: disable(default)/enable rx/tx " @@ -577,7 +581,9 @@ us_vhost_usage(const char *prgname) " --rx-desc-num [0-N]: the number of descriptors on rx, " "used only when zero copy is enabled.\n" " --tx-desc-num [0-N]: the number of descriptors on tx, " - "used only when zero copy is enabled.\n", + "used only when zero copy is enabled.\n" + " --tx-csum [0|1] disable/enable TX checksum offload.\n" + " --tso [0|1] disable/enable TCP segment offload.\n", prgname); } @@ -597,11 +603,14 @@ us_vhost_parse_args(int argc, char **argv) {"rx-retry-delay", required_argument, NULL, 0}, {"rx-retry-num", required_argument, NULL, 0}, {"mergeable", required_argument, NULL, 0}, + {"vlan-strip", required_argument, NULL, 0}, {"stats", required_argument, NULL, 0}, {"dev-basename", required_argument, NULL, 0}, {"zero-copy", required_argument, NULL, 0}, {"rx-desc-num", required_argument, NULL, 0}, {"tx-desc-num", required_argument, NULL, 0}, + {"tx-csum", required_argument, NULL, 0}, + {"tso", required_argument, NULL, 0}, {NULL, 0, 0, 0}, }; @@ -656,6 +665,28 @@ us_vhost_parse_args(int argc, char **argv) } } + /* Enable/disable TX checksum offload. */ + if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tx_csum = ret; + } + + /* Enable/disable TSO offload. */ + if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tso = ret; + } + /* Specify the retries delay time (in useconds) on RX. */ if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) { ret = parse_num_opt(optarg, INT32_MAX); @@ -697,6 +728,22 @@ us_vhost_parse_args(int argc, char **argv) } } + /* Enable/disable RX VLAN strip on host. */ + if (!strncmp(long_option[option_index].name, + "vlan-strip", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for VLAN strip [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else { + vlan_strip = !!ret; + vmdq_conf_default.rxmode.hw_vlan_strip = + vlan_strip; + } + } + /* Enable/disable stats. */ if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) { ret = parse_num_opt(optarg, INT32_MAX); @@ -730,19 +777,6 @@ us_vhost_parse_args(int argc, char **argv) return -1; } else zero_copy = ret; - - if (zero_copy) { -#ifdef RTE_MBUF_REFCNT - RTE_LOG(ERR, VHOST_CONFIG, "Before running " - "zero copy vhost APP, please " - "disable RTE_MBUF_REFCNT\n" - "in config file and then rebuild DPDK " - "core lib!\n" - "Otherwise please disable zero copy " - "flag in command line!\n"); - return -1; -#endif - } } /* Specify the descriptor number on RX. */ @@ -906,7 +940,7 @@ gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa, static inline int __attribute__((always_inline)) ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) { - return (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0); + return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; } /* @@ -949,13 +983,16 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) vdev->vlan_tag); /* Register the MAC address. */ - ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, (uint32_t)dev->device_fh); + ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, + (uint32_t)dev->device_fh + vmdq_pool_base); if (ret) RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n", dev->device_fh); /* Enable stripping of the vlan tag as we handle routing. */ - rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)vdev->vmdq_rx_q, 1); + if (vlan_strip) + rte_eth_dev_set_vlan_strip_on_queue(ports[0], + (uint16_t)vdev->vmdq_rx_q, 1); /* Set device as ready for RX. */ vdev->ready = DEVICE_RX; @@ -1044,8 +1081,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) rte_atomic64_add( &dev_statistics[tdev->device_fh].rx_atomic, ret); - dev_statistics[tdev->device_fh].tx_total++; - dev_statistics[tdev->device_fh].tx += ret; + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; } } @@ -1099,7 +1136,7 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, "(%"PRIu64") TX: pkt to local VM device id:" "(%"PRIu64") vlan tag: %d.\n", dev->device_fh, dev_ll->vdev->dev->device_fh, - vlan_tag); + (int)*vlan_tag); break; } @@ -1108,6 +1145,34 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, return 0; } +static uint16_t +get_psd_sum(void *l3_hdr, uint64_t ol_flags) +{ + if (ol_flags & PKT_TX_IPV4) + return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); + else /* assume ethertype == ETHER_TYPE_IPv6 */ + return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +} + +static void virtio_tx_offload(struct rte_mbuf *m) +{ + void *l3_hdr; + struct ipv4_hdr *ipv4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + l3_hdr = (char *)eth_hdr + m->l2_len; + + if (m->ol_flags & PKT_TX_IPV4) { + ipv4_hdr = l3_hdr; + ipv4_hdr->hdr_checksum = 0; + m->ol_flags |= PKT_TX_IP_CKSUM; + } + + tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len); + tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); +} + /* * This function routes the TX packet to the correct interface. This may be a local device * or the physical port. @@ -1120,6 +1185,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) unsigned len, ret, offset = 0; const uint16_t lcore_id = rte_lcore_id(); struct virtio_net *dev = vdev->dev; + struct ether_hdr *nh; /*check if destination is local VM*/ if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) { @@ -1127,9 +1193,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) return; } - if (vm2vm_mode == VM2VM_HARDWARE) { - if (find_local_dest(dev, m, &offset, &vlan_tag) != 0 || - offset > rte_pktmbuf_tailroom(m)) { + if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { + if (unlikely(find_local_dest(dev, m, &offset, &vlan_tag) != 0)) { rte_pktmbuf_free(m); return; } @@ -1141,12 +1206,41 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) tx_q = &lcore_tx_queue[lcore_id]; len = tx_q->len; - m->ol_flags = PKT_TX_VLAN_PKT; + nh = rte_pktmbuf_mtod(m, struct ether_hdr *); + if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) { + /* Guest has inserted the vlan tag. */ + struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1); + uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); + if ((vm2vm_mode == VM2VM_HARDWARE) && + (vh->vlan_tci != vlan_tag_be)) + vh->vlan_tci = vlan_tag_be; + } else { + m->ol_flags |= PKT_TX_VLAN_PKT; + + /* + * Find the right seg to adjust the data len when offset is + * bigger than tail room size. + */ + if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { + if (likely(offset <= rte_pktmbuf_tailroom(m))) + m->data_len += offset; + else { + struct rte_mbuf *seg = m; + + while ((seg->next != NULL) && + (offset > rte_pktmbuf_tailroom(seg))) + seg = seg->next; - m->data_len += offset; - m->pkt_len += offset; + seg->data_len += offset; + } + m->pkt_len += offset; + } - m->vlan_tci = vlan_tag; + m->vlan_tci = vlan_tag; + } + + if (m->ol_flags & PKT_TX_TCP_SEG) + virtio_tx_offload(m); tx_q->m_table[len] = m; len++; @@ -1298,12 +1392,14 @@ switch_worker(__attribute__((unused)) void *arg) /* If this is the first received packet we need to learn the MAC and setup VMDQ */ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) { if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) { - while (tx_count--) - rte_pktmbuf_free(pkts_burst[tx_count]); + while (tx_count) + rte_pktmbuf_free(pkts_burst[--tx_count]); } } - while (tx_count) - virtio_tx_route(vdev, pkts_burst[--tx_count], (uint16_t)dev->device_fh); + for (i = 0; i < tx_count; ++i) { + virtio_tx_route(vdev, pkts_burst[i], + vlan_tags[(uint16_t)dev->device_fh]); + } } /*move to the next device in the list*/ @@ -1401,7 +1497,7 @@ put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); } /* @@ -1417,7 +1513,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) uint64_t buff_addr, phys_addr; struct vhost_virtqueue *vq; struct vring_desc *desc; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; struct vpool *vpool; hpa_type addr_type; struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; @@ -1468,7 +1565,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) } } while (unlikely(phys_addr == 0)); - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: " @@ -1485,7 +1583,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) "size required: %d\n", dev->device_fh, desc->len, desc_idx, vpool->buf_size); put_desc_to_used_list_zcp(vq, desc_idx); - rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); + rte_ring_sp_enqueue(vpool->ring, obj); return; } @@ -1517,7 +1615,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) static inline void pktmbuf_detach_zcp(struct rte_mbuf *m) { const struct rte_mempool *mp = m->pool; - void *buf = RTE_MBUF_TO_BADDR(m); + void *buf = rte_mbuf_to_baddr(m); uint32_t buf_ofs; uint32_t buf_len = mp->elt_size - sizeof(*m); m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m); @@ -1557,7 +1655,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) for (index = 0; index < mbuf_count; index++) { mbuf = __rte_mbuf_raw_alloc(vpool->pool); - if (likely(RTE_MBUF_INDIRECT(mbuf))) + if (likely(MBUF_EXT_MEM(mbuf))) pktmbuf_detach_zcp(mbuf); rte_ring_sp_enqueue(vpool->ring, mbuf); @@ -1594,7 +1692,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return 0; } @@ -1620,7 +1718,7 @@ static void mbuf_destroy_zcp(struct vpool *vpool) for (index = 0; index < mbuf_count; index++) { mbuf = __rte_mbuf_raw_alloc(vpool->pool); if (likely(mbuf != NULL)) { - if (likely(RTE_MBUF_INDIRECT(mbuf))) + if (likely(MBUF_EXT_MEM(mbuf))) pktmbuf_detach_zcp(mbuf); rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); } @@ -1742,7 +1840,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return count; } @@ -1757,7 +1855,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, { struct mbuf_table *tx_q; struct rte_mbuf **m_table; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; unsigned len, ret, offset = 0; struct vpool *vpool; uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; @@ -1769,7 +1868,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, /* Allocate an mbuf and populate the structure. */ vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q]; - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; RTE_LOG(ERR, VHOST_DATA, @@ -1809,7 +1909,7 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, mbuf->buf_physaddr = m->buf_physaddr; mbuf->buf_addr = m->buf_addr; } - mbuf->ol_flags = PKT_TX_VLAN_PKT; + mbuf->ol_flags |= PKT_TX_VLAN_PKT; mbuf->vlan_tci = vlan_tag; mbuf->l2_len = sizeof(struct ether_hdr); mbuf->l3_len = sizeof(struct ipv4_hdr); @@ -2243,7 +2343,7 @@ alloc_data_ll(uint32_t size) } ll_new[i].next = NULL; - return (ll_new); + return ll_new; } /* @@ -2562,7 +2662,7 @@ new_device (struct virtio_net *dev) struct vhost_dev *vdev; uint32_t regionidx; - vdev = rte_zmalloc("vhost device", sizeof(*vdev), CACHE_LINE_SIZE); + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); if (vdev == NULL) { RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Couldn't allocate memory for vhost dev\n", dev->device_fh); @@ -2582,9 +2682,10 @@ new_device (struct virtio_net *dev) } - vdev->regions_hpa = (struct virtio_memory_regions_hpa *) rte_zmalloc("vhost hpa region", - sizeof(struct virtio_memory_regions_hpa) * vdev->nregions_hpa, - CACHE_LINE_SIZE); + vdev->regions_hpa = rte_calloc("vhost hpa region", + vdev->nregions_hpa, + sizeof(struct virtio_memory_regions_hpa), + RTE_CACHE_LINE_SIZE); if (vdev->regions_hpa == NULL) { RTE_LOG(ERR, VHOST_CONFIG, "Cannot allocate memory for hpa region\n"); rte_free(vdev); @@ -2620,7 +2721,7 @@ new_device (struct virtio_net *dev) ll_dev->vdev = vdev; add_data_ll_entry(&ll_root_used, ll_dev); vdev->vmdq_rx_q - = dev->device_fh * (num_queues / num_devices); + = dev->device_fh * queues_per_pool + vmdq_queue_base; if (zero_copy) { uint32_t index = vdev->vmdq_rx_q; @@ -2713,8 +2814,7 @@ new_device (struct virtio_net *dev) RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh); vdev->ready = DEVICE_SAFE_REMOVE; destroy_device(dev); - if (vdev->regions_hpa) - rte_free(vdev->regions_hpa); + rte_free(vdev->regions_hpa); rte_free(vdev); return -1; } @@ -2811,12 +2911,8 @@ static void setup_mempool_tbl(int socket, uint32_t index, char *pool_name, char *ring_name, uint32_t nb_mbuf) { - uint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM; - vpool_array[index].pool - = rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP, - MBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize, - rte_pktmbuf_init, NULL, socket, 0); + vpool_array[index].pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf, + MBUF_CACHE_SIZE_ZCP, 0, MBUF_DATA_SIZE_ZCP, socket); if (vpool_array[index].pool != NULL) { vpool_array[index].ring = rte_ring_create(ring_name, @@ -2837,26 +2933,40 @@ setup_mempool_tbl(int socket, uint32_t index, char *pool_name, } /* Need consider head room. */ - vpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM; + vpool_array[index].buf_size = VIRTIO_DESCRIPTOR_LEN_ZCP; } else { rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name); } } +/* When we receive a INT signal, unregister vhost driver */ +static void +sigint_handler(__rte_unused int signum) +{ + /* Unregister vhost driver. */ + int ret = rte_vhost_driver_unregister((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n"); + exit(0); +} /* * Main function, does initialisation and calls the per-lcore functions. The CUSE * device is also registered here to handle the IOCTLs. */ int -MAIN(int argc, char *argv[]) +main(int argc, char *argv[]) { struct rte_mempool *mbuf_pool = NULL; unsigned lcore_id, core_id = 0; unsigned nb_ports, valid_num_ports; int ret; - uint8_t portid, queue_id = 0; + uint8_t portid; + uint16_t queue_id; static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + signal(SIGINT, sigint_handler); /* init EAL */ ret = rte_eal_init(argc, argv); @@ -2899,15 +3009,9 @@ MAIN(int argc, char *argv[]) if (zero_copy == 0) { /* Create the mbuf pool. */ - mbuf_pool = rte_mempool_create( - "MBUF_POOL", - NUM_MBUFS_PER_PORT - * valid_num_ports, - MBUF_SIZE, MBUF_CACHE_SIZE, - sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, NULL, - rte_pktmbuf_init, NULL, - rte_socket_id(), 0); + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", + NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE, + 0, MBUF_DATA_SIZE, rte_socket_id()); if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); @@ -2925,14 +3029,6 @@ MAIN(int argc, char *argv[]) char pool_name[RTE_MEMPOOL_NAMESIZE]; char ring_name[RTE_MEMPOOL_NAMESIZE]; - /* - * Zero copy defers queue RX/TX start to the time when guest - * finishes its startup and packet buffers from that guest are - * available. - */ - rx_conf_default.rx_deferred_start = (uint8_t)zero_copy; - rx_conf_default.rx_drop_en = 0; - tx_conf_default.tx_deferred_start = (uint8_t)zero_copy; nb_mbuf = num_rx_descriptor + num_switching_cores * MBUF_CACHE_SIZE_ZCP + num_switching_cores * MAX_PKT_BURST; @@ -2991,8 +3087,19 @@ MAIN(int argc, char *argv[]) memset(&dev_statistics, 0, sizeof(dev_statistics)); /* Enable stats if the user option is set. */ - if (enable_stats) - pthread_create(&tid, NULL, (void*)print_stats, NULL ); + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } /* Launch all data cores. */ if (zero_copy == 0) { @@ -3020,10 +3127,10 @@ MAIN(int argc, char *argv[]) } LOG_DEBUG(VHOST_CONFIG, - "in MAIN: mbuf count in mempool at initial " + "in main: mbuf count in mempool at initial " "is: %d\n", count_in_mempool); LOG_DEBUG(VHOST_CONFIG, - "in MAIN: mbuf count in ring at initial is :" + "in main: mbuf count in ring at initial is :" " %d\n", rte_ring_count(vpool_array[index].ring)); } @@ -3036,10 +3143,10 @@ MAIN(int argc, char *argv[]) if (mergeable == 0) rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); - /* Register CUSE device to handle IOCTLs. */ + /* Register vhost(cuse or user) driver to handle vhost messages. */ ret = rte_vhost_driver_register((char *)&dev_basename); if (ret != 0) - rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n"); + rte_exit(EXIT_FAILURE, "vhost driver register failure.\n"); rte_vhost_driver_callback_register(&virtio_net_device_ops); @@ -3048,4 +3155,3 @@ MAIN(int argc, char *argv[]) return 0; } -