From: Jianfeng Tan Date: Wed, 15 Jun 2016 09:03:22 +0000 (+0000) Subject: net/virtio-user: add vhost-user adapter layer X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=6a84c37e3975;p=dpdk.git net/virtio-user: add vhost-user adapter layer This patch provides vhost adapter layer implementation. Two main help functions are provided to upper layer (device emulation): - vhost_user_setup(), to set up vhost user backend; - vhost_user_sock(), to talk with vhost user backend. ---------------------- | ------------------ | | | virtio driver | | | ------------------ | | | | | ------------------ | ------> virtio-user PMD | | device emulate | | | | | | | | vhost adapter |-|----> (vhost_user.c) | ------------------ | ---------------------- | | -------------- --> (vhost-user protocol) | ------------------ | vhost backend | ------------------ Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman Acked-by: Yuanhan Liu --- diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2726..2483dfa518 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,4 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y +CONFIG_RTE_VIRTIO_USER=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60436..c2ed0fad88 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,10 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_USER),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h new file mode 100644 index 0000000000..4e04ede47a --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -0,0 +1,141 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include +#include +#include + +#include "../virtio_pci.h" +#include "../virtio_logs.h" +#include "../virtqueue.h" + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + uint64_t used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + uint64_t avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. + */ + uint64_t log_guest_addr; +}; + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_MAX +}; + +struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ + uint64_t userspace_addr; + uint64_t mmap_offset; +}; + +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory memory; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +int vhost_user_sock(int vhostfd, uint64_t req, void *arg); +int vhost_user_setup(const char *path); +#endif diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c new file mode 100644 index 0000000000..2b34cac13c --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -0,0 +1,405 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" + +static int +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) +{ + int r; + struct msghdr msgh; + struct iovec iov; + size_t fd_size = fd_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + iov.iov_base = (uint8_t *)buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fd_size); + + do { + r = sendmsg(fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + return r; +} + +static int +vhost_user_read(int fd, struct vhost_user_msg *msg) +{ + uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; + int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; + + ret = recv(fd, (void *)msg, sz_hdr, 0); + if (ret < sz_hdr) { + PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", + ret, sz_hdr); + goto fail; + } + + /* validate msg flags */ + if (msg->flags != (valid_flags)) { + PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", + msg->flags, valid_flags); + goto fail; + } + + sz_payload = msg->size; + if (sz_payload) { + ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); + if (ret < sz_payload) { + PMD_DRV_LOG(ERR, + "Failed to recv msg payload: %d instead of %d.", + ret, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +struct hugepage_file_info { + uint64_t addr; /**< virtual addr */ + size_t size; /**< the file size */ + char path[PATH_MAX]; /**< path to backing file */ +}; + +/* Two possible options: + * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file + * array. This is simple but cannot be used in secondary process because + * secondary process will close and munmap that file. + * 2. Match HUGEFILE_FMT to find hugepage files directly. + * + * We choose option 2. + */ +static int +get_hugepage_file_info(struct hugepage_file_info huges[], int max) +{ + int idx; + FILE *f; + char buf[BUFSIZ], *tmp, *tail; + char *str_underline, *str_start; + int huge_index; + uint64_t v_start, v_end; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); + return -1; + } + + idx = 0; + while (fgets(buf, sizeof(buf), f) != NULL) { + if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { + PMD_DRV_LOG(ERR, "Failed to parse address"); + goto error; + } + + tmp = strchr(buf, ' ') + 1; /** skip address */ + tmp = strchr(tmp, ' ') + 1; /** skip perm */ + tmp = strchr(tmp, ' ') + 1; /** skip offset */ + tmp = strchr(tmp, ' ') + 1; /** skip dev */ + tmp = strchr(tmp, ' ') + 1; /** skip inode */ + while (*tmp == ' ') /** skip spaces */ + tmp++; + tail = strrchr(tmp, '\n'); /** remove newline if exists */ + if (tail) + *tail = '\0'; + + /* Match HUGEFILE_FMT, aka "%s/%smap_%d", + * which is defined in eal_filesystem.h + */ + str_underline = strrchr(tmp, '_'); + if (!str_underline) + continue; + + str_start = str_underline - strlen("map"); + if (str_start < tmp) + continue; + + if (sscanf(str_start, "map_%d", &huge_index) != 1) + continue; + + if (idx >= max) { + PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); + goto error; + } + huges[idx].addr = v_start; + huges[idx].size = v_end - v_start; + strcpy(huges[idx].path, tmp); + idx++; + } + + fclose(f); + return idx; + +error: + fclose(f); + return -1; +} + +static int +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) +{ + int i, num; + struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; + struct vhost_memory_region *mr; + + num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); + if (num < 0) { + PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); + return -1; + } + + for (i = 0; i < num; ++i) { + mr = &msg->payload.memory.regions[i]; + mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ + mr->userspace_addr = huges[i].addr; + mr->memory_size = huges[i].size; + mr->mmap_offset = 0; + fds[i] = open(huges[i].path, O_RDWR); + } + + msg->payload.memory.nregions = num; + msg->payload.memory.padding = 0; + + return 0; +} + +static struct vhost_user_msg m; + +static const char * const vhost_msg_strings[] = { + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + NULL, +}; + +int +vhost_user_sock(int vhostfd, uint64_t req, void *arg) +{ + struct vhost_user_msg msg; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fd_num = 0; + int i, len; + + RTE_SET_USED(m); + RTE_SET_USED(vhost_msg_strings); + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + msg.request = req; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (req) { + case VHOST_USER_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_USER_SET_FEATURES: + case VHOST_USER_SET_LOG_BASE: + msg.payload.u64 = *((__u64 *)arg); + msg.size = sizeof(m.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + break; + + case VHOST_USER_SET_MEM_TABLE: + if (prepare_vhost_memory_user(&msg, fds) < 0) + return -1; + fd_num = msg.payload.memory.nregions; + msg.size = sizeof(m.payload.memory.nregions); + msg.size += sizeof(m.payload.memory.padding); + msg.size += fd_num * sizeof(struct vhost_memory_region); + break; + + case VHOST_USER_SET_LOG_FD: + fds[fd_num++] = *((int *)arg); + break; + + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + need_reply = 1; + break; + + case VHOST_USER_SET_VRING_ADDR: + memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); + msg.size = sizeof(m.payload.addr); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + file = arg; + msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.payload.u64); + if (file->fd > 0) + fds[fd_num++] = file->fd; + else + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + break; + + default: + PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); + return -1; + } + + len = VHOST_USER_HDR_SIZE + msg.size; + if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + return -1; + } + + if (req == VHOST_USER_SET_MEM_TABLE) + for (i = 0; i < fd_num; ++i) + close(fds[i]); + + if (need_reply) { + if (vhost_user_read(vhostfd, &msg) < 0) { + PMD_DRV_LOG(ERR, "Received msg failed: %s", + strerror(errno)); + return -1; + } + + if (req != msg.request) { + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + + switch (req) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.payload.u64)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + *((__u64 *)arg) = msg.payload.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.payload.state)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + memcpy(arg, &msg.payload.state, + sizeof(struct vhost_vring_state)); + break; + default: + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + } + + return 0; +} + +/** + * Set up environment to talk with a vhost user backend. + * @param path + * - The path to vhost user unix socket file. + * + * @return + * - (-1) if fail to set up; + * - (>=0) if successful, and it is the fd to vhostfd. + */ +int +vhost_user_setup(const char *path) +{ + int fd; + int flag; + struct sockaddr_un un; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); + return -1; + } + + flag = fcntl(fd, F_GETFD); + fcntl(fd, F_SETFD, flag | FD_CLOEXEC); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { + PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); + close(fd); + return -1; + } + + return fd; +}