X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=28c17afd556db9f463bf34a8211b5ddde52a02fb;hb=2a73125b7041;hp=195d82f6052f85cd8dcf12821c2a8a67d3185608;hpb=355e6735b3359223f32485c3cbd4e99583eacb7c;p=dpdk.git diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 195d82f605..28c17afd55 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,10 +50,14 @@ #include #include #include +#include +#include #include "main.h" -#define MAX_QUEUES 512 +#ifndef MAX_QUEUES +#define MAX_QUEUES 128 +#endif /* the maximum number of external ports supported */ #define MAX_SUP_PORTS 1 @@ -61,27 +65,27 @@ /* * Calculate the number of buffers needed per port */ -#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ (num_switching_cores*MAX_PKT_BURST) + \ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ - (num_switching_cores*MBUF_CACHE_SIZE)) + ((num_switching_cores+1)*MBUF_CACHE_SIZE)) -#define MBUF_CACHE_SIZE 128 -#define MBUF_DATA_SIZE (2048 + RTE_PKTMBUF_HEADROOM) +#define MBUF_CACHE_SIZE 128 +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE /* * No frame data buffer allocated from host are required for zero copy * implementation, guest will allocate the frame data buffer, and vhost * directly use it. */ -#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518 -#define MBUF_DATA_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM) +#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE #define MBUF_CACHE_SIZE_ZCP 0 -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ -#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ -#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ #define JUMBO_FRAME_MAX_SIZE 0x2600 @@ -197,6 +201,13 @@ typedef enum { static uint32_t enable_stats = 0; /* Enable retries on RX. */ static uint32_t enable_retry = 1; + +/* Disable TX checksum offload */ +static uint32_t enable_tx_csum; + +/* Disable TSO offload */ +static uint32_t enable_tso; + /* Specify timeout (in useconds) between retries on RX. */ static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; /* Specify the number of retries on RX. */ @@ -292,20 +303,6 @@ struct vlan_ethhdr { __be16 h_vlan_encapsulated_proto; }; -/* IPv4 Header */ -struct ipv4_hdr { - uint8_t version_ihl; /**< version and header length */ - uint8_t type_of_service; /**< type of service */ - uint16_t total_length; /**< length of packet */ - uint16_t packet_id; /**< packet ID */ - uint16_t fragment_offset; /**< fragmentation offset */ - uint8_t time_to_live; /**< time to live */ - uint8_t next_proto_id; /**< protocol ID */ - uint16_t hdr_checksum; /**< header checksum */ - uint32_t src_addr; /**< source address */ - uint32_t dst_addr; /**< destination address */ -} __attribute__((__packed__)); - /* Header lengths. */ #define VLAN_HLEN 4 #define VLAN_ETH_HLEN 18 @@ -441,6 +438,14 @@ port_init(uint8_t port) if (port >= rte_eth_dev_count()) return -1; + if (enable_tx_csum == 0) + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM); + + if (enable_tso == 0) { + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + } + rx_rings = (uint16_t)dev_info.max_rx_queues; /* Configure ethernet device. */ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); @@ -576,7 +581,9 @@ us_vhost_usage(const char *prgname) " --rx-desc-num [0-N]: the number of descriptors on rx, " "used only when zero copy is enabled.\n" " --tx-desc-num [0-N]: the number of descriptors on tx, " - "used only when zero copy is enabled.\n", + "used only when zero copy is enabled.\n" + " --tx-csum [0|1] disable/enable TX checksum offload.\n" + " --tso [0|1] disable/enable TCP segment offload.\n", prgname); } @@ -602,6 +609,8 @@ us_vhost_parse_args(int argc, char **argv) {"zero-copy", required_argument, NULL, 0}, {"rx-desc-num", required_argument, NULL, 0}, {"tx-desc-num", required_argument, NULL, 0}, + {"tx-csum", required_argument, NULL, 0}, + {"tso", required_argument, NULL, 0}, {NULL, 0, 0, 0}, }; @@ -656,6 +665,28 @@ us_vhost_parse_args(int argc, char **argv) } } + /* Enable/disable TX checksum offload. */ + if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tx_csum = ret; + } + + /* Enable/disable TSO offload. */ + if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tso = ret; + } + /* Specify the retries delay time (in useconds) on RX. */ if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) { ret = parse_num_opt(optarg, INT32_MAX); @@ -909,7 +940,7 @@ gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa, static inline int __attribute__((always_inline)) ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) { - return (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0); + return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; } /* @@ -1050,8 +1081,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) rte_atomic64_add( &dev_statistics[tdev->device_fh].rx_atomic, ret); - dev_statistics[tdev->device_fh].tx_total++; - dev_statistics[tdev->device_fh].tx += ret; + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; } } @@ -1105,7 +1136,7 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, "(%"PRIu64") TX: pkt to local VM device id:" "(%"PRIu64") vlan tag: %d.\n", dev->device_fh, dev_ll->vdev->dev->device_fh, - vlan_tag); + (int)*vlan_tag); break; } @@ -1114,6 +1145,34 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, return 0; } +static uint16_t +get_psd_sum(void *l3_hdr, uint64_t ol_flags) +{ + if (ol_flags & PKT_TX_IPV4) + return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); + else /* assume ethertype == ETHER_TYPE_IPv6 */ + return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +} + +static void virtio_tx_offload(struct rte_mbuf *m) +{ + void *l3_hdr; + struct ipv4_hdr *ipv4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + l3_hdr = (char *)eth_hdr + m->l2_len; + + if (m->ol_flags & PKT_TX_IPV4) { + ipv4_hdr = l3_hdr; + ipv4_hdr->hdr_checksum = 0; + m->ol_flags |= PKT_TX_IP_CKSUM; + } + + tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len); + tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); +} + /* * This function routes the TX packet to the correct interface. This may be a local device * or the physical port. @@ -1156,7 +1215,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) (vh->vlan_tci != vlan_tag_be)) vh->vlan_tci = vlan_tag_be; } else { - m->ol_flags = PKT_TX_VLAN_PKT; + m->ol_flags |= PKT_TX_VLAN_PKT; /* * Find the right seg to adjust the data len when offset is @@ -1180,6 +1239,9 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) m->vlan_tci = vlan_tag; } + if (m->ol_flags & PKT_TX_TCP_SEG) + virtio_tx_offload(m); + tx_q->m_table[len] = m; len++; if (enable_stats) { @@ -1334,8 +1396,10 @@ switch_worker(__attribute__((unused)) void *arg) rte_pktmbuf_free(pkts_burst[--tx_count]); } } - while (tx_count) - virtio_tx_route(vdev, pkts_burst[--tx_count], (uint16_t)dev->device_fh); + for (i = 0; i < tx_count; ++i) { + virtio_tx_route(vdev, pkts_burst[i], + vlan_tags[(uint16_t)dev->device_fh]); + } } /*move to the next device in the list*/ @@ -1433,7 +1497,7 @@ put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); } /* @@ -1449,7 +1513,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) uint64_t buff_addr, phys_addr; struct vhost_virtqueue *vq; struct vring_desc *desc; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; struct vpool *vpool; hpa_type addr_type; struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; @@ -1500,7 +1565,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) } } while (unlikely(phys_addr == 0)); - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: " @@ -1517,7 +1583,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) "size required: %d\n", dev->device_fh, desc->len, desc_idx, vpool->buf_size); put_desc_to_used_list_zcp(vq, desc_idx); - rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); + rte_ring_sp_enqueue(vpool->ring, obj); return; } @@ -1626,7 +1692,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return 0; } @@ -1774,7 +1840,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return count; } @@ -1789,7 +1855,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, { struct mbuf_table *tx_q; struct rte_mbuf **m_table; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; unsigned len, ret, offset = 0; struct vpool *vpool; uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; @@ -1801,7 +1868,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, /* Allocate an mbuf and populate the structure. */ vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q]; - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; RTE_LOG(ERR, VHOST_DATA, @@ -1841,7 +1909,7 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, mbuf->buf_physaddr = m->buf_physaddr; mbuf->buf_addr = m->buf_addr; } - mbuf->ol_flags = PKT_TX_VLAN_PKT; + mbuf->ol_flags |= PKT_TX_VLAN_PKT; mbuf->vlan_tci = vlan_tag; mbuf->l2_len = sizeof(struct ether_hdr); mbuf->l3_len = sizeof(struct ipv4_hdr); @@ -2275,7 +2343,7 @@ alloc_data_ll(uint32_t size) } ll_new[i].next = NULL; - return (ll_new); + return ll_new; } /* @@ -2871,6 +2939,16 @@ setup_mempool_tbl(int socket, uint32_t index, char *pool_name, } } +/* When we receive a INT signal, unregister vhost driver */ +static void +sigint_handler(__rte_unused int signum) +{ + /* Unregister vhost driver. */ + int ret = rte_vhost_driver_unregister((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n"); + exit(0); +} /* * Main function, does initialisation and calls the per-lcore functions. The CUSE @@ -2886,6 +2964,9 @@ main(int argc, char *argv[]) uint8_t portid; uint16_t queue_id; static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + signal(SIGINT, sigint_handler); /* init EAL */ ret = rte_eal_init(argc, argv); @@ -3006,8 +3087,19 @@ main(int argc, char *argv[]) memset(&dev_statistics, 0, sizeof(dev_statistics)); /* Enable stats if the user option is set. */ - if (enable_stats) - pthread_create(&tid, NULL, (void*)print_stats, NULL ); + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } /* Launch all data cores. */ if (zero_copy == 0) { @@ -3051,10 +3143,10 @@ main(int argc, char *argv[]) if (mergeable == 0) rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); - /* Register CUSE device to handle IOCTLs. */ + /* Register vhost(cuse or user) driver to handle vhost messages. */ ret = rte_vhost_driver_register((char *)&dev_basename); if (ret != 0) - rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n"); + rte_exit(EXIT_FAILURE, "vhost driver register failure.\n"); rte_vhost_driver_callback_register(&virtio_net_device_ops); @@ -3063,4 +3155,3 @@ main(int argc, char *argv[]) return 0; } -