X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fvhost%2Fmain.c;h=78fd1ab155aa1e39b33f3b86642cc020820adf63;hb=1f49ec153c8f91ee34c23e58c7443eb87f566b60;hp=195d82f6052f85cd8dcf12821c2a8a67d3185608;hpb=355e6735b3359223f32485c3cbd4e99583eacb7c;p=dpdk.git diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 195d82f605..78fd1ab155 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,10 +50,14 @@ #include #include #include +#include +#include #include "main.h" -#define MAX_QUEUES 512 +#ifndef MAX_QUEUES +#define MAX_QUEUES 128 +#endif /* the maximum number of external ports supported */ #define MAX_SUP_PORTS 1 @@ -61,27 +65,27 @@ /* * Calculate the number of buffers needed per port */ -#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ (num_switching_cores*MAX_PKT_BURST) + \ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ - (num_switching_cores*MBUF_CACHE_SIZE)) + ((num_switching_cores+1)*MBUF_CACHE_SIZE)) -#define MBUF_CACHE_SIZE 128 -#define MBUF_DATA_SIZE (2048 + RTE_PKTMBUF_HEADROOM) +#define MBUF_CACHE_SIZE 128 +#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE /* * No frame data buffer allocated from host are required for zero copy * implementation, guest will allocate the frame data buffer, and vhost * directly use it. */ -#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518 -#define MBUF_DATA_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM) +#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM +#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE #define MBUF_CACHE_SIZE_ZCP 0 -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ -#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ -#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ #define JUMBO_FRAME_MAX_SIZE 0x2600 @@ -197,6 +201,13 @@ typedef enum { static uint32_t enable_stats = 0; /* Enable retries on RX. */ static uint32_t enable_retry = 1; + +/* Disable TX checksum offload */ +static uint32_t enable_tx_csum; + +/* Disable TSO offload */ +static uint32_t enable_tso; + /* Specify timeout (in useconds) between retries on RX. */ static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; /* Specify the number of retries on RX. */ @@ -292,20 +303,6 @@ struct vlan_ethhdr { __be16 h_vlan_encapsulated_proto; }; -/* IPv4 Header */ -struct ipv4_hdr { - uint8_t version_ihl; /**< version and header length */ - uint8_t type_of_service; /**< type of service */ - uint16_t total_length; /**< length of packet */ - uint16_t packet_id; /**< packet ID */ - uint16_t fragment_offset; /**< fragmentation offset */ - uint8_t time_to_live; /**< time to live */ - uint8_t next_proto_id; /**< protocol ID */ - uint16_t hdr_checksum; /**< header checksum */ - uint32_t src_addr; /**< source address */ - uint32_t dst_addr; /**< destination address */ -} __attribute__((__packed__)); - /* Header lengths. */ #define VLAN_HLEN 4 #define VLAN_ETH_HLEN 18 @@ -441,6 +438,14 @@ port_init(uint8_t port) if (port >= rte_eth_dev_count()) return -1; + if (enable_tx_csum == 0) + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM); + + if (enable_tso == 0) { + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + } + rx_rings = (uint16_t)dev_info.max_rx_queues; /* Configure ethernet device. */ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); @@ -576,7 +581,9 @@ us_vhost_usage(const char *prgname) " --rx-desc-num [0-N]: the number of descriptors on rx, " "used only when zero copy is enabled.\n" " --tx-desc-num [0-N]: the number of descriptors on tx, " - "used only when zero copy is enabled.\n", + "used only when zero copy is enabled.\n" + " --tx-csum [0|1] disable/enable TX checksum offload.\n" + " --tso [0|1] disable/enable TCP segment offload.\n", prgname); } @@ -602,6 +609,8 @@ us_vhost_parse_args(int argc, char **argv) {"zero-copy", required_argument, NULL, 0}, {"rx-desc-num", required_argument, NULL, 0}, {"tx-desc-num", required_argument, NULL, 0}, + {"tx-csum", required_argument, NULL, 0}, + {"tso", required_argument, NULL, 0}, {NULL, 0, 0, 0}, }; @@ -656,6 +665,28 @@ us_vhost_parse_args(int argc, char **argv) } } + /* Enable/disable TX checksum offload. */ + if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tx_csum = ret; + } + + /* Enable/disable TSO offload. */ + if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); + us_vhost_usage(prgname); + return -1; + } else + enable_tso = ret; + } + /* Specify the retries delay time (in useconds) on RX. */ if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) { ret = parse_num_opt(optarg, INT32_MAX); @@ -846,7 +877,7 @@ static unsigned check_ports_num(unsigned nb_ports) * Macro to print out packet contents. Wrapped in debug define so that the * data path is not effected when debug is disabled. */ -#ifdef DEBUG +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG #define PRINT_PACKET(device, addr, size, header) do { \ char *pkt_addr = (char*)(addr); \ unsigned int index; \ @@ -862,7 +893,7 @@ static unsigned check_ports_num(unsigned nb_ports) } \ snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \ \ - LOG_DEBUG(VHOST_DATA, "%s", packet); \ + RTE_LOG(DEBUG, VHOST_DATA, "%s", packet); \ } while(0) #else #define PRINT_PACKET(device, addr, size, header) do{} while(0) @@ -896,7 +927,7 @@ gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa, } } - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| HPA %p\n", + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") GPA %p| HPA %p\n", vdev->dev->device_fh, (void *)(uintptr_t)guest_pa, (void *)(uintptr_t)vhost_pa); @@ -909,7 +940,7 @@ gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa, static inline int __attribute__((always_inline)) ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) { - return (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0); + return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0; } /* @@ -1028,18 +1059,22 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) /* Drop the packet if the TX packet is destined for the TX device. */ if (dev_ll->vdev->dev->device_fh == dev->device_fh) { - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n", - dev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: " + "Source and destination MAC addresses are the same. " + "Dropping packet.\n", + dev->device_fh); return 0; } tdev = dev_ll->vdev->dev; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", tdev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: " + "MAC address is local\n", tdev->device_fh); if (unlikely(dev_ll->vdev->remove)) { /*drop the packet if the device is marked for removal*/ - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", tdev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") " + "Device is marked for removal\n", tdev->device_fh); } else { /*send the packet to the local virtio device*/ ret = rte_vhost_enqueue_burst(tdev, VIRTIO_RXQ, &m, 1); @@ -1050,8 +1085,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) rte_atomic64_add( &dev_statistics[tdev->device_fh].rx_atomic, ret); - dev_statistics[tdev->device_fh].tx_total++; - dev_statistics[tdev->device_fh].tx += ret; + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx += ret; } } @@ -1083,7 +1118,7 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, * destined for the TX device. */ if (dev_ll->vdev->dev->device_fh == dev->device_fh) { - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") TX: Source and destination" " MAC addresses are the same. Dropping " "packet.\n", @@ -1101,11 +1136,11 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, (uint16_t) vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh]; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") TX: pkt to local VM device id:" "(%"PRIu64") vlan tag: %d.\n", dev->device_fh, dev_ll->vdev->dev->device_fh, - vlan_tag); + (int)*vlan_tag); break; } @@ -1114,6 +1149,34 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m, return 0; } +static uint16_t +get_psd_sum(void *l3_hdr, uint64_t ol_flags) +{ + if (ol_flags & PKT_TX_IPV4) + return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); + else /* assume ethertype == ETHER_TYPE_IPv6 */ + return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +} + +static void virtio_tx_offload(struct rte_mbuf *m) +{ + void *l3_hdr; + struct ipv4_hdr *ipv4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + l3_hdr = (char *)eth_hdr + m->l2_len; + + if (m->ol_flags & PKT_TX_IPV4) { + ipv4_hdr = l3_hdr; + ipv4_hdr->hdr_checksum = 0; + m->ol_flags |= PKT_TX_IP_CKSUM; + } + + tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len); + tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); +} + /* * This function routes the TX packet to the correct interface. This may be a local device * or the physical port. @@ -1141,7 +1204,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) } } - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: " + "MAC address is external\n", dev->device_fh); /*Add packet to the port tx queue*/ tx_q = &lcore_tx_queue[lcore_id]; @@ -1156,7 +1220,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) (vh->vlan_tci != vlan_tag_be)) vh->vlan_tci = vlan_tag_be; } else { - m->ol_flags = PKT_TX_VLAN_PKT; + m->ol_flags |= PKT_TX_VLAN_PKT; /* * Find the right seg to adjust the data len when offset is @@ -1180,6 +1244,9 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) m->vlan_tci = vlan_tag; } + if (m->ol_flags & PKT_TX_TCP_SEG) + virtio_tx_offload(m); + tx_q->m_table[len] = m; len++; if (enable_stats) { @@ -1248,7 +1315,9 @@ switch_worker(__attribute__((unused)) void *arg) if (unlikely(diff_tsc > drain_tsc)) { if (tx_q->len) { - LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len); + RTE_LOG(DEBUG, VHOST_DATA, + "TX queue drained after timeout with burst size %u\n", + tx_q->len); /*Tx any packets in the queue*/ ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, @@ -1334,8 +1403,10 @@ switch_worker(__attribute__((unused)) void *arg) rte_pktmbuf_free(pkts_burst[--tx_count]); } } - while (tx_count) - virtio_tx_route(vdev, pkts_burst[--tx_count], (uint16_t)dev->device_fh); + for (i = 0; i < tx_count; ++i) { + virtio_tx_route(vdev, pkts_burst[i], + vlan_tags[(uint16_t)dev->device_fh]); + } } /*move to the next device in the list*/ @@ -1380,7 +1451,7 @@ get_available_ring_index_zcp(struct virtio_net *dev, avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - *res_base_idx); - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in get_available_ring_index_zcp: " + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") in get_available_ring_index_zcp: " "avail idx: %d, " "res base idx:%d, free entries:%d\n", dev->device_fh, avail_idx, *res_base_idx, @@ -1405,7 +1476,7 @@ get_available_ring_index_zcp(struct virtio_net *dev, count = free_entries; if (unlikely(count == 0)) { - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") Fail in get_available_ring_index_zcp: " "avail idx: %d, res base idx:%d, free entries:%d\n", dev->device_fh, avail_idx, @@ -1433,7 +1504,7 @@ put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); } /* @@ -1449,7 +1520,8 @@ attach_rxmbuf_zcp(struct virtio_net *dev) uint64_t buff_addr, phys_addr; struct vhost_virtqueue *vq; struct vring_desc *desc; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; struct vpool *vpool; hpa_type addr_type; struct vhost_dev *vdev = (struct vhost_dev *)dev->priv; @@ -1500,9 +1572,10 @@ attach_rxmbuf_zcp(struct virtio_net *dev) } } while (unlikely(phys_addr == 0)); - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: " "ring_sc_dequeue fail.\n", dev->device_fh); @@ -1511,13 +1584,13 @@ attach_rxmbuf_zcp(struct virtio_net *dev) } if (unlikely(vpool->buf_size > desc->len)) { - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: frame buffer " "length(%d) of descriptor idx: %d less than room " "size required: %d\n", dev->device_fh, desc->len, desc_idx, vpool->buf_size); put_desc_to_used_list_zcp(vq, desc_idx); - rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); + rte_ring_sp_enqueue(vpool->ring, obj); return; } @@ -1527,7 +1600,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev) mbuf->data_len = desc->len; MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in attach_rxmbuf_zcp: res base idx:%d, " "descriptor idx:%d\n", dev->device_fh, res_base_idx, desc_idx); @@ -1578,11 +1651,11 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) uint32_t index = 0; uint32_t mbuf_count = rte_mempool_count(vpool->pool); - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool before " "clean is: %d\n", dev->device_fh, mbuf_count); - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring before " "clean is : %d\n", dev->device_fh, rte_ring_count(vpool->ring)); @@ -1600,22 +1673,22 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) used_idx = (used_idx + 1) & (vq->size - 1); } - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool after " "clean is: %d\n", dev->device_fh, rte_mempool_count(vpool->pool)); - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring after " "clean is : %d\n", dev->device_fh, rte_ring_count(vpool->ring)); - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: before updated " "vq->last_used_idx:%d\n", dev->device_fh, vq->last_used_idx); vq->last_used_idx += mbuf_count; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in txmbuf_clean_zcp: after updated " "vq->last_used_idx:%d\n", dev->device_fh, vq->last_used_idx); @@ -1626,7 +1699,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return 0; } @@ -1640,11 +1713,11 @@ static void mbuf_destroy_zcp(struct vpool *vpool) struct rte_mbuf *mbuf = NULL; uint32_t index, mbuf_count = rte_mempool_count(vpool->pool); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in mbuf_destroy_zcp: mbuf count in mempool before " "mbuf_destroy_zcp is: %d\n", mbuf_count); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in mbuf_destroy_zcp: mbuf count in ring before " "mbuf_destroy_zcp is : %d\n", rte_ring_count(vpool->ring)); @@ -1658,11 +1731,11 @@ static void mbuf_destroy_zcp(struct vpool *vpool) } } - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in mbuf_destroy_zcp: mbuf count in mempool after " "mbuf_destroy_zcp is: %d\n", rte_mempool_count(vpool->pool)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in mbuf_destroy_zcp: mbuf count in ring after " "mbuf_destroy_zcp is : %d\n", rte_ring_count(vpool->ring)); @@ -1686,7 +1759,8 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t head_idx, packet_success = 0; uint16_t res_cur_idx; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") virtio_dev_rx()\n", + dev->device_fh); if (count == 0) return 0; @@ -1695,7 +1769,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; res_cur_idx = vq->last_used_idx; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_cur_idx + count); /* Retrieve all of the head indexes first to avoid caching issues. */ @@ -1710,7 +1784,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in dev_rx_zcp: update the used idx for " "pkt[%d] descriptor idx: %d\n", dev->device_fh, packet_success, @@ -1759,7 +1833,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, rte_compiler_barrier(); - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in dev_rx_zcp: before update used idx: " "vq.last_used_idx: %d, vq->used->idx: %d\n", dev->device_fh, vq->last_used_idx, vq->used->idx); @@ -1767,14 +1841,14 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx += count; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in dev_rx_zcp: after update used idx: " "vq.last_used_idx: %d, vq->used->idx: %d\n", dev->device_fh, vq->last_used_idx, vq->used->idx); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->callfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return count; } @@ -1789,7 +1863,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, { struct mbuf_table *tx_q; struct rte_mbuf **m_table; - struct rte_mbuf *mbuf = NULL; + void *obj = NULL; + struct rte_mbuf *mbuf; unsigned len, ret, offset = 0; struct vpool *vpool; uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; @@ -1801,7 +1876,8 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, /* Allocate an mbuf and populate the structure. */ vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q]; - rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); + rte_ring_sc_dequeue(vpool->ring, &obj); + mbuf = obj; if (unlikely(mbuf == NULL)) { struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; RTE_LOG(ERR, VHOST_DATA, @@ -1841,7 +1917,7 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, mbuf->buf_physaddr = m->buf_physaddr; mbuf->buf_addr = m->buf_addr; } - mbuf->ol_flags = PKT_TX_VLAN_PKT; + mbuf->ol_flags |= PKT_TX_VLAN_PKT; mbuf->vlan_tci = vlan_tag; mbuf->l2_len = sizeof(struct ether_hdr); mbuf->l3_len = sizeof(struct ipv4_hdr); @@ -1850,7 +1926,7 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, tx_q->m_table[len] = mbuf; len++; - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "(%"PRIu64") in tx_route_zcp: pkt: nb_seg: %d, next:%s\n", dev->device_fh, mbuf->nb_segs, @@ -1912,7 +1988,8 @@ virtio_dev_tx_zcp(struct virtio_net *dev) if (vq->last_used_idx_res == avail_idx) return; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") virtio_dev_tx()\n", + dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]); @@ -1924,7 +2001,7 @@ virtio_dev_tx_zcp(struct virtio_net *dev) free_entries = (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", + RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ @@ -2061,7 +2138,7 @@ switch_worker_zcp(__attribute__((unused)) void *arg) if (likely(!vdev->remove)) { tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q]; if (tx_q->len) { - LOG_DEBUG(VHOST_DATA, + RTE_LOG(DEBUG, VHOST_DATA, "TX queue drained after timeout" " with burst size %u\n", tx_q->len); @@ -2275,7 +2352,7 @@ alloc_data_ll(uint32_t size) } ll_new[i].next = NULL; - return (ll_new); + return ll_new; } /* @@ -2397,14 +2474,14 @@ destroy_device (volatile struct virtio_net *dev) /* Stop the RX queue. */ if (rte_eth_dev_rx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") In destroy_device: Failed to stop " "rx queue:%d\n", dev->device_fh, vdev->vmdq_rx_q); } - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") in destroy_device: Start put mbuf in " "mempool back to ring for RX queue: %d\n", dev->device_fh, vdev->vmdq_rx_q); @@ -2413,7 +2490,7 @@ destroy_device (volatile struct virtio_net *dev) /* Stop the TX queue. */ if (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") In destroy_device: Failed to " "stop tx queue:%d\n", dev->device_fh, vdev->vmdq_rx_q); @@ -2421,7 +2498,7 @@ destroy_device (volatile struct virtio_net *dev) vpool = &vpool_array[vdev->vmdq_rx_q + MAX_QUEUES]; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") destroy_device: Start put mbuf in mempool " "back to ring for TX queue: %d, dev:(%"PRIu64")\n", dev->device_fh, (vdev->vmdq_rx_q + MAX_QUEUES), @@ -2445,14 +2522,14 @@ check_hpa_regions(uint64_t vva_start, uint64_t size) uint32_t i, nregions = 0, page_size = getpagesize(); uint64_t cur_phys_addr = 0, next_phys_addr = 0; if (vva_start % page_size) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in check_countinous: vva start(%p) mod page_size(%d) " "has remainder\n", (void *)(uintptr_t)vva_start, page_size); return 0; } if (size % page_size) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in check_countinous: " "size((%"PRIu64")) mod page_size(%d) has remainder\n", size, page_size); @@ -2465,13 +2542,13 @@ check_hpa_regions(uint64_t vva_start, uint64_t size) (void *)(uintptr_t)(vva_start + i + page_size)); if ((cur_phys_addr + page_size) != next_phys_addr) { ++nregions; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in check_continuous: hva addr:(%p) is not " "continuous with hva addr:(%p), diff:%d\n", (void *)(uintptr_t)(vva_start + (uint64_t)i), (void *)(uintptr_t)(vva_start + (uint64_t)i + page_size), page_size); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in check_continuous: hpa addr:(%p) is not " "continuous with hpa addr:(%p), " "diff:(%"PRIu64")\n", @@ -2506,12 +2583,12 @@ fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct mem_region_hpa[regionidx_hpa].host_phys_addr_offset = rte_mem_virt2phy((void *)(uintptr_t)(vva_start)) - mem_region_hpa[regionidx_hpa].guest_phys_address; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest phys addr start[%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) (mem_region_hpa[regionidx_hpa].guest_phys_address)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: host phys addr start[%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) @@ -2531,12 +2608,12 @@ fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct k + page_size; mem_region_hpa[regionidx_hpa].memory_size = k + page_size; - LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest " + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest " "phys addr end [%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) (mem_region_hpa[regionidx_hpa].guest_phys_address_end)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest phys addr " "size [%d]:(%p)\n", regionidx_hpa, @@ -2548,12 +2625,12 @@ fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct mem_region_hpa[regionidx_hpa].host_phys_addr_offset = next_phys_addr - mem_region_hpa[regionidx_hpa].guest_phys_address; - LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest" + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest" " phys addr start[%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) (mem_region_hpa[regionidx_hpa].guest_phys_address)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: host phys addr " "start[%d]:(%p)\n", regionidx_hpa, @@ -2568,11 +2645,11 @@ fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct = mem_region_hpa[regionidx_hpa].guest_phys_address + k + page_size; mem_region_hpa[regionidx_hpa].memory_size = k + page_size; - LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr end " + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest phys addr end " "[%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) (mem_region_hpa[regionidx_hpa].guest_phys_address_end)); - LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr size " + RTE_LOG(DEBUG, VHOST_CONFIG, "in fill_hpa_regions: guest phys addr size " "[%d]:(%p)\n", regionidx_hpa, (void *)(uintptr_t) (mem_region_hpa[regionidx_hpa].memory_size)); @@ -2662,12 +2739,12 @@ new_device (struct virtio_net *dev) count_in_ring = rte_ring_count(vpool_array[index].ring); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in mempool " "before attach is: %d\n", dev->device_fh, rte_mempool_count(vpool_array[index].pool)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in ring " "before attach is : %d\n", dev->device_fh, count_in_ring); @@ -2678,12 +2755,12 @@ new_device (struct virtio_net *dev) for (i = 0; i < count_in_ring; i++) attach_rxmbuf_zcp(dev); - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " - "mempool after attach is: %d\n", + RTE_LOG(DEBUG, VHOST_CONFIG, "(%" PRIu64 ") in new_device: " + "mbuf count in mempool after attach is: %d\n", dev->device_fh, rte_mempool_count(vpool_array[index].pool)); - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " - "ring after attach is : %d\n", + RTE_LOG(DEBUG, VHOST_CONFIG, "(%" PRIu64 ") in new_device: " + "mbuf count in ring after attach is : %d\n", dev->device_fh, rte_ring_count(vpool_array[index].ring)); @@ -2693,7 +2770,7 @@ new_device (struct virtio_net *dev) if (rte_eth_dev_tx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) { struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q]; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") In new_device: Failed to start " "tx queue:%d\n", dev->device_fh, vdev->vmdq_rx_q); @@ -2707,7 +2784,7 @@ new_device (struct virtio_net *dev) if (rte_eth_dev_rx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) { struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q]; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") In new_device: Failed to start " "rx queue:%d\n", dev->device_fh, vdev->vmdq_rx_q); @@ -2715,7 +2792,7 @@ new_device (struct virtio_net *dev) /* Stop the TX queue. */ if (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "(%"PRIu64") In new_device: Failed to " "stop tx queue:%d\n", dev->device_fh, vdev->vmdq_rx_q); @@ -2851,11 +2928,11 @@ setup_mempool_tbl(int socket, uint32_t index, char *pool_name, rte_align32pow2(nb_mbuf + 1), socket, RING_F_SP_ENQ | RING_F_SC_DEQ); if (likely(vpool_array[index].ring != NULL)) { - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in setup_mempool_tbl: mbuf count in " "mempool is: %d\n", rte_mempool_count(vpool_array[index].pool)); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in setup_mempool_tbl: mbuf count in " "ring is: %d\n", rte_ring_count(vpool_array[index].ring)); @@ -2871,6 +2948,16 @@ setup_mempool_tbl(int socket, uint32_t index, char *pool_name, } } +/* When we receive a INT signal, unregister vhost driver */ +static void +sigint_handler(__rte_unused int signum) +{ + /* Unregister vhost driver. */ + int ret = rte_vhost_driver_unregister((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n"); + exit(0); +} /* * Main function, does initialisation and calls the per-lcore functions. The CUSE @@ -2886,6 +2973,9 @@ main(int argc, char *argv[]) uint8_t portid; uint16_t queue_id; static pthread_t tid; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + signal(SIGINT, sigint_handler); /* init EAL */ ret = rte_eal_init(argc, argv); @@ -2940,7 +3030,7 @@ main(int argc, char *argv[]) if (vm2vm_mode == VM2VM_HARDWARE) { /* Enable VT loop back to let L2 switch to do it. */ vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "Enable loop back for L2 switch in vmdq.\n"); } } else { @@ -2978,12 +3068,10 @@ main(int argc, char *argv[]) if (vm2vm_mode == VM2VM_HARDWARE) { /* Enable VT loop back to let L2 switch to do it. */ vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "Enable loop back for L2 switch in vmdq.\n"); } } - /* Set log level. */ - rte_set_log_level(LOG_LEVEL); /* initialize all ports */ for (portid = 0; portid < nb_ports; portid++) { @@ -3006,8 +3094,19 @@ main(int argc, char *argv[]) memset(&dev_statistics, 0, sizeof(dev_statistics)); /* Enable stats if the user option is set. */ - if (enable_stats) - pthread_create(&tid, NULL, (void*)print_stats, NULL ); + if (enable_stats) { + ret = pthread_create(&tid, NULL, (void *)print_stats, NULL); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "Cannot create print-stats thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats"); + ret = rte_thread_setname(tid, thread_name); + if (ret != 0) + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot set print-stats name\n"); + } /* Launch all data cores. */ if (zero_copy == 0) { @@ -3034,10 +3133,10 @@ main(int argc, char *argv[]) (void *)mbuf); } - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in main: mbuf count in mempool at initial " "is: %d\n", count_in_mempool); - LOG_DEBUG(VHOST_CONFIG, + RTE_LOG(DEBUG, VHOST_CONFIG, "in main: mbuf count in ring at initial is :" " %d\n", rte_ring_count(vpool_array[index].ring)); @@ -3051,10 +3150,10 @@ main(int argc, char *argv[]) if (mergeable == 0) rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); - /* Register CUSE device to handle IOCTLs. */ + /* Register vhost(cuse or user) driver to handle vhost messages. */ ret = rte_vhost_driver_register((char *)&dev_basename); if (ret != 0) - rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n"); + rte_exit(EXIT_FAILURE, "vhost driver register failure.\n"); rte_vhost_driver_callback_register(&virtio_net_device_ops); @@ -3063,4 +3162,3 @@ main(int argc, char *argv[]) return 0; } -