We now have one vhost implementation; no sub source dir is needed.
Remove it by move them to upper dir.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/virtio-net-user.c
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/fd_man.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost-net-user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net-user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += fd_man.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ * index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+ ;
+
+ return i == MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+ return fdset_find_fd(pfdset, -1);
+}
+
+static int
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+ fd_cb rcb, fd_cb wcb, void *dat)
+{
+ struct fdentry *pfdentry;
+
+ if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE)
+ return -1;
+
+ pfdentry = &pfdset->fd[idx];
+ pfdentry->fd = fd;
+ pfdentry->rcb = rcb;
+ pfdentry->wcb = wcb;
+ pfdentry->dat = dat;
+
+ return 0;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ * the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+ struct fdentry *pfdentry;
+ int i, maxfds = -1;
+ int num = MAX_FDS;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < num; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (pfdentry->fd != -1) {
+ int added = 0;
+ if (pfdentry->rcb && rfset) {
+ FD_SET(pfdentry->fd, rfset);
+ added = 1;
+ }
+ if (pfdentry->wcb && wfset) {
+ FD_SET(pfdentry->fd, wfset);
+ added = 1;
+ }
+ if (added)
+ maxfds = pfdentry->fd < maxfds ?
+ maxfds : pfdentry->fd;
+ }
+ }
+ return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return;
+
+ for (i = 0; i < MAX_FDS; i++) {
+ pfdset->fd[i].fd = -1;
+ pfdset->fd[i].dat = NULL;
+ }
+ pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+ int i;
+
+ if (pfdset == NULL || fd == -1)
+ return -1;
+
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ /* Find a free slot in the list. */
+ i = fdset_find_free_slot(pfdset);
+ if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) {
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ return -2;
+ }
+
+ pfdset->num++;
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+
+ return 0;
+}
+
+/**
+ * Unregister the fd from the fdset.
+ * Returns context of a given fd or NULL.
+ */
+void *
+fdset_del(struct fdset *pfdset, int fd)
+{
+ int i;
+ void *dat = NULL;
+
+ if (pfdset == NULL || fd == -1)
+ return NULL;
+
+ do {
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ i = fdset_find_fd(pfdset, fd);
+ if (i != -1 && pfdset->fd[i].busy == 0) {
+ /* busy indicates r/wcb is executing! */
+ dat = pfdset->fd[i].dat;
+ pfdset->fd[i].fd = -1;
+ pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+ pfdset->fd[i].dat = NULL;
+ pfdset->num--;
+ i = -1;
+ }
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ } while (i != -1);
+
+ return dat;
+}
+
+/**
+ * Unregister the fd at the specified slot from the fdset.
+ */
+static void
+fdset_del_slot(struct fdset *pfdset, int index)
+{
+ if (pfdset == NULL || index < 0 || index >= MAX_FDS)
+ return;
+
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ pfdset->fd[index].fd = -1;
+ pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
+ pfdset->fd[index].dat = NULL;
+ pfdset->num--;
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+ fd_set rfds, wfds;
+ int i, maxfds;
+ struct fdentry *pfdentry;
+ int num = MAX_FDS;
+ fd_cb rcb, wcb;
+ void *dat;
+ int fd;
+ int remove1, remove2;
+ int ret;
+
+ if (pfdset == NULL)
+ return;
+
+ while (1) {
+ struct timeval tv;
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ maxfds = fdset_fill(&rfds, &wfds, pfdset);
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+
+ /*
+ * When select is blocked, other threads might unregister
+ * listenfds from and register new listenfds into fdset.
+ * When select returns, the entries for listenfds in the fdset
+ * might have been updated. It is ok if there is unwanted call
+ * for new listenfds.
+ */
+ ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
+ if (ret <= 0)
+ continue;
+
+ for (i = 0; i < num; i++) {
+ remove1 = remove2 = 0;
+ pthread_mutex_lock(&pfdset->fd_mutex);
+ pfdentry = &pfdset->fd[i];
+ fd = pfdentry->fd;
+ rcb = pfdentry->rcb;
+ wcb = pfdentry->wcb;
+ dat = pfdentry->dat;
+ pfdentry->busy = 1;
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
+ rcb(fd, dat, &remove1);
+ if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
+ wcb(fd, dat, &remove2);
+ pfdentry->busy = 0;
+ /*
+ * fdset_del needs to check busy flag.
+ * We don't allow fdset_del to be called in callback
+ * directly.
+ */
+ /*
+ * When we are to clean up the fd from fdset,
+ * because the fd is closed in the cb,
+ * the old fd val could be reused by when creates new
+ * listen fd in another thread, we couldn't call
+ * fd_set_del.
+ */
+ if (remove1 || remove2)
+ fdset_del_slot(pfdset, i);
+ }
+ }
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+#include <pthread.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct fdentry {
+ int fd; /* -1 indicates this entry is empty */
+ fd_cb rcb; /* callback when this fd is readable. */
+ fd_cb wcb; /* callback when this fd is writeable.*/
+ void *dat; /* fd context */
+ int busy; /* whether this entry is being used in cb. */
+};
+
+struct fdset {
+ struct fdentry fd[MAX_FDS];
+ pthread_mutex_t fd_mutex;
+ int num; /* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+ fd_cb rcb, fd_cb wcb, void *dat);
+
+void *fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+ char *path;
+ int listenfd;
+ int connfd;
+ bool is_server;
+ bool reconnect;
+};
+
+struct vhost_user_connection {
+ struct vhost_user_socket *vsocket;
+ int vid;
+};
+
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+ struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
+ struct fdset fdset;
+ int vsocket_cnt;
+ pthread_mutex_t mutex;
+};
+
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
+
+static struct vhost_user vhost_user = {
+ .fdset = {
+ .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+ .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .num = 0
+ },
+ .vsocket_cnt = 0,
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
+ [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
+ [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
+ [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
+ [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+ [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
+};
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+ struct iovec iov;
+ struct msghdr msgh;
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ ret = recvmsg(sockfd, &msgh, 0);
+ if (ret <= 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+ return ret;
+ }
+
+ if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(fds, CMSG_DATA(cmsg), fdsize);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ if (ret <= 0)
+ return ret;
+
+ if (msg && msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid msg size: %d\n", msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret <= 0)
+ return ret;
+ if (ret != (int)msg->size) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh;
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ if (fds && fd_num > 0) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fdsize);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fdsize);
+ } else {
+ msgh.msg_control = NULL;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ ret = sendmsg(sockfd, &msgh, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ if (!msg)
+ return 0;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ ret = send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+ return ret;
+}
+
+
+static void
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
+{
+ int vid;
+ size_t size;
+ struct vhost_user_connection *conn;
+ int ret;
+
+ conn = malloc(sizeof(*conn));
+ if (conn == NULL) {
+ close(fd);
+ return;
+ }
+
+ vid = vhost_new_device();
+ if (vid == -1) {
+ close(fd);
+ free(conn);
+ return;
+ }
+
+ size = strnlen(vsocket->path, PATH_MAX);
+ vhost_set_ifname(vid, vsocket->path, size);
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
+
+ vsocket->connfd = fd;
+ conn->vsocket = vsocket;
+ conn->vid = vid;
+ ret = fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler,
+ NULL, conn);
+ if (ret < 0) {
+ vsocket->connfd = -1;
+ free(conn);
+ close(fd);
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to add fd %d into vhost server fdset\n",
+ fd);
+ }
+}
+
+/* call back when there is new vhost-user connection from client */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+ struct vhost_user_socket *vsocket = dat;
+
+ fd = accept(fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+ vhost_user_add_connection(fd, vsocket);
+}
+
+/* callback when there is message on the connfd */
+static void
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
+{
+ int vid;
+ struct vhost_user_connection *conn = dat;
+ struct VhostUserMsg msg;
+ uint64_t features;
+ int ret;
+
+ vid = conn->vid;
+ ret = read_vhost_message(connfd, &msg);
+ if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ struct vhost_user_socket *vsocket = conn->vsocket;
+
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read message failed\n");
+ else if (ret == 0)
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost peer closed\n");
+ else
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read incorrect message\n");
+
+ vsocket->connfd = -1;
+ close(connfd);
+ *remove = 1;
+ free(conn);
+ vhost_destroy_device(vid);
+
+ if (vsocket->reconnect)
+ vhost_user_create_client(vsocket);
+
+ return;
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request]);
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ ret = vhost_get_features(vid, &features);
+ msg.payload.u64 = features;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_FEATURES:
+ features = msg.payload.u64;
+ vhost_set_features(vid, &features);
+ break;
+
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ user_set_protocol_features(vid, msg.payload.u64);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ vhost_set_owner(vid);
+ break;
+ case VHOST_USER_RESET_OWNER:
+ vhost_reset_owner(vid);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ user_set_mem_table(vid, &msg);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ user_set_log_base(vid, &msg);
+
+ /* it needs a reply */
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_LOG_FD:
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ vhost_set_vring_num(vid, &msg.payload.state);
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ vhost_set_vring_addr(vid, &msg.payload.addr);
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ vhost_set_vring_base(vid, &msg.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ ret = user_get_vring_base(vid, &msg.payload.state);
+ msg.size = sizeof(msg.payload.state);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ user_set_vring_kick(vid, &msg);
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ user_set_vring_call(vid, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+ break;
+
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ user_set_vring_enable(vid, &msg.payload.state);
+ break;
+ case VHOST_USER_SEND_RARP:
+ user_send_rarp(vid, &msg);
+ break;
+
+ default:
+ break;
+
+ }
+}
+
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
+{
+ int fd;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+ is_server ? "server" : "client", fd);
+
+ if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost-user: can't set nonblocking mode for socket, fd: "
+ "%d (%s)\n", fd, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ memset(un, 0, sizeof(*un));
+ un->sun_family = AF_UNIX;
+ strncpy(un->sun_path, path, sizeof(un->sun_path));
+ un->sun_path[sizeof(un->sun_path) - 1] = '\0';
+
+ return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
+ return -1;
+
+ ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to bind to %s: %s; remove it and try again\n",
+ path, strerror(errno));
+ goto err;
+ }
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+ ret = listen(fd, MAX_VIRTIO_BACKLOG);
+ if (ret < 0)
+ goto err;
+
+ vsocket->listenfd = fd;
+ ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+ NULL, vsocket);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to add listen fd %d to vhost server fdset\n",
+ fd);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+struct vhost_user_reconnect {
+ struct sockaddr_un un;
+ int fd;
+ struct vhost_user_socket *vsocket;
+
+ TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+ struct vhost_user_reconnect_tailq_list head;
+ pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+ int ret, flags;
+
+ ret = connect(fd, un, sz);
+ if (ret < 0 && errno != EISCONN)
+ return -1;
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't get flags for connfd %d\n", fd);
+ return -2;
+ }
+ if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't disable nonblocking on fd %d\n", fd);
+ return -2;
+ }
+ return 0;
+}
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+ int ret;
+ struct vhost_user_reconnect *reconn, *next;
+
+ while (1) {
+ pthread_mutex_lock(&reconn_list.mutex);
+
+ /*
+ * An equal implementation of TAILQ_FOREACH_SAFE,
+ * which does not exist on all platforms.
+ */
+ for (reconn = TAILQ_FIRST(&reconn_list.head);
+ reconn != NULL; reconn = next) {
+ next = TAILQ_NEXT(reconn, next);
+
+ ret = vhost_user_connect_nonblock(reconn->fd,
+ (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un));
+ if (ret == -2) {
+ close(reconn->fd);
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "reconnection for fd %d failed\n",
+ reconn->fd);
+ goto remove_fd;
+ }
+ if (ret == -1)
+ continue;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "%s: connected\n", reconn->vsocket->path);
+ vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
+ TAILQ_REMOVE(&reconn_list.head, reconn, next);
+ free(reconn);
+ }
+
+ pthread_mutex_unlock(&reconn_list.mutex);
+ sleep(1);
+ }
+
+ return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+ int ret;
+
+ pthread_mutex_init(&reconn_list.mutex, NULL);
+ TAILQ_INIT(&reconn_list.head);
+
+ ret = pthread_create(&reconn_tid, NULL,
+ vhost_user_client_reconnect, NULL);
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+ return ret;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+ struct vhost_user_reconnect *reconn;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
+ return -1;
+
+ ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+ sizeof(un));
+ if (ret == 0) {
+ vhost_user_add_connection(fd, vsocket);
+ return 0;
+ }
+
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to connect to %s: %s\n",
+ path, strerror(errno));
+
+ if (ret == -2 || !vsocket->reconnect) {
+ close(fd);
+ return -1;
+ }
+
+ RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+ reconn = malloc(sizeof(*reconn));
+ if (reconn == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to allocate memory for reconnect\n");
+ close(fd);
+ return -1;
+ }
+ reconn->un = un;
+ reconn->fd = fd;
+ reconn->vsocket = vsocket;
+ pthread_mutex_lock(&reconn_list.mutex);
+ TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+ pthread_mutex_unlock(&reconn_list.mutex);
+
+ return 0;
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+ int ret = -1;
+ struct vhost_user_socket *vsocket;
+
+ if (!path)
+ return -1;
+
+ pthread_mutex_lock(&vhost_user.mutex);
+
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: the number of vhost sockets reaches maximum\n");
+ goto out;
+ }
+
+ vsocket = malloc(sizeof(struct vhost_user_socket));
+ if (!vsocket)
+ goto out;
+ memset(vsocket, 0, sizeof(struct vhost_user_socket));
+ vsocket->path = strdup(path);
+ vsocket->connfd = -1;
+
+ if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+ if (vsocket->reconnect && reconn_tid == 0) {
+ if (vhost_user_reconnect_init() < 0) {
+ free(vsocket->path);
+ free(vsocket);
+ goto out;
+ }
+ }
+ ret = vhost_user_create_client(vsocket);
+ } else {
+ vsocket->is_server = true;
+ ret = vhost_user_create_server(vsocket);
+ }
+ if (ret < 0) {
+ free(vsocket->path);
+ free(vsocket);
+ goto out;
+ }
+
+ vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
+ pthread_mutex_unlock(&vhost_user.mutex);
+
+ return ret;
+}
+
+static bool
+vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
+{
+ int found = false;
+ struct vhost_user_reconnect *reconn, *next;
+
+ pthread_mutex_lock(&reconn_list.mutex);
+
+ for (reconn = TAILQ_FIRST(&reconn_list.head);
+ reconn != NULL; reconn = next) {
+ next = TAILQ_NEXT(reconn, next);
+
+ if (reconn->vsocket == vsocket) {
+ TAILQ_REMOVE(&reconn_list.head, reconn, next);
+ close(reconn->fd);
+ free(reconn);
+ found = true;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&reconn_list.mutex);
+ return found;
+}
+
+/**
+ * Unregister the specified vhost socket
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+ int i;
+ int count;
+ struct vhost_user_connection *conn;
+
+ pthread_mutex_lock(&vhost_user.mutex);
+
+ for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+ struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
+
+ if (!strcmp(vsocket->path, path)) {
+ if (vsocket->is_server) {
+ fdset_del(&vhost_user.fdset, vsocket->listenfd);
+ close(vsocket->listenfd);
+ unlink(path);
+ } else if (vsocket->reconnect) {
+ vhost_user_remove_reconnect(vsocket);
+ }
+
+ conn = fdset_del(&vhost_user.fdset, vsocket->connfd);
+ if (conn) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "free connfd = %d for device '%s'\n",
+ vsocket->connfd, path);
+ close(vsocket->connfd);
+ vhost_destroy_device(conn->vid);
+ free(conn);
+ }
+
+ free(vsocket->path);
+ free(vsocket);
+
+ count = --vhost_user.vsocket_cnt;
+ vhost_user.vsockets[i] = vhost_user.vsockets[count];
+ vhost_user.vsockets[count] = NULL;
+ pthread_mutex_unlock(&vhost_user.mutex);
+
+ return 0;
+ }
+ }
+ pthread_mutex_unlock(&vhost_user.mutex);
+
+ return -1;
+}
+
+int
+rte_vhost_driver_session_start(void)
+{
+ fdset_event_dispatch(&vhost_user.fdset);
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_virtio_net.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK 0x3
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK 0xff
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ VhostUserLog log;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 0x1
+
+/*****************************************************************************/
+#endif
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/select.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "fd_man.h"
-
-/**
- * Returns the index in the fdset for a given fd.
- * If fd is -1, it means to search for a free entry.
- * @return
- * index for the fd, or -1 if fd isn't in the fdset.
- */
-static int
-fdset_find_fd(struct fdset *pfdset, int fd)
-{
- int i;
-
- if (pfdset == NULL)
- return -1;
-
- for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
- ;
-
- return i == MAX_FDS ? -1 : i;
-}
-
-static int
-fdset_find_free_slot(struct fdset *pfdset)
-{
- return fdset_find_fd(pfdset, -1);
-}
-
-static int
-fdset_add_fd(struct fdset *pfdset, int idx, int fd,
- fd_cb rcb, fd_cb wcb, void *dat)
-{
- struct fdentry *pfdentry;
-
- if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE)
- return -1;
-
- pfdentry = &pfdset->fd[idx];
- pfdentry->fd = fd;
- pfdentry->rcb = rcb;
- pfdentry->wcb = wcb;
- pfdentry->dat = dat;
-
- return 0;
-}
-
-/**
- * Fill the read/write fd_set with the fds in the fdset.
- * @return
- * the maximum fds filled in the read/write fd_set.
- */
-static int
-fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
-{
- struct fdentry *pfdentry;
- int i, maxfds = -1;
- int num = MAX_FDS;
-
- if (pfdset == NULL)
- return -1;
-
- for (i = 0; i < num; i++) {
- pfdentry = &pfdset->fd[i];
- if (pfdentry->fd != -1) {
- int added = 0;
- if (pfdentry->rcb && rfset) {
- FD_SET(pfdentry->fd, rfset);
- added = 1;
- }
- if (pfdentry->wcb && wfset) {
- FD_SET(pfdentry->fd, wfset);
- added = 1;
- }
- if (added)
- maxfds = pfdentry->fd < maxfds ?
- maxfds : pfdentry->fd;
- }
- }
- return maxfds;
-}
-
-void
-fdset_init(struct fdset *pfdset)
-{
- int i;
-
- if (pfdset == NULL)
- return;
-
- for (i = 0; i < MAX_FDS; i++) {
- pfdset->fd[i].fd = -1;
- pfdset->fd[i].dat = NULL;
- }
- pfdset->num = 0;
-}
-
-/**
- * Register the fd in the fdset with read/write handler and context.
- */
-int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
-{
- int i;
-
- if (pfdset == NULL || fd == -1)
- return -1;
-
- pthread_mutex_lock(&pfdset->fd_mutex);
-
- /* Find a free slot in the list. */
- i = fdset_find_free_slot(pfdset);
- if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) {
- pthread_mutex_unlock(&pfdset->fd_mutex);
- return -2;
- }
-
- pfdset->num++;
-
- pthread_mutex_unlock(&pfdset->fd_mutex);
-
- return 0;
-}
-
-/**
- * Unregister the fd from the fdset.
- * Returns context of a given fd or NULL.
- */
-void *
-fdset_del(struct fdset *pfdset, int fd)
-{
- int i;
- void *dat = NULL;
-
- if (pfdset == NULL || fd == -1)
- return NULL;
-
- do {
- pthread_mutex_lock(&pfdset->fd_mutex);
-
- i = fdset_find_fd(pfdset, fd);
- if (i != -1 && pfdset->fd[i].busy == 0) {
- /* busy indicates r/wcb is executing! */
- dat = pfdset->fd[i].dat;
- pfdset->fd[i].fd = -1;
- pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
- pfdset->fd[i].dat = NULL;
- pfdset->num--;
- i = -1;
- }
- pthread_mutex_unlock(&pfdset->fd_mutex);
- } while (i != -1);
-
- return dat;
-}
-
-/**
- * Unregister the fd at the specified slot from the fdset.
- */
-static void
-fdset_del_slot(struct fdset *pfdset, int index)
-{
- if (pfdset == NULL || index < 0 || index >= MAX_FDS)
- return;
-
- pthread_mutex_lock(&pfdset->fd_mutex);
-
- pfdset->fd[index].fd = -1;
- pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
- pfdset->fd[index].dat = NULL;
- pfdset->num--;
-
- pthread_mutex_unlock(&pfdset->fd_mutex);
-}
-
-/**
- * This functions runs in infinite blocking loop until there is no fd in
- * pfdset. It calls corresponding r/w handler if there is event on the fd.
- *
- * Before the callback is called, we set the flag to busy status; If other
- * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
- * will wait until the flag is reset to zero(which indicates the callback is
- * finished), then it could free the context after fdset_del.
- */
-void
-fdset_event_dispatch(struct fdset *pfdset)
-{
- fd_set rfds, wfds;
- int i, maxfds;
- struct fdentry *pfdentry;
- int num = MAX_FDS;
- fd_cb rcb, wcb;
- void *dat;
- int fd;
- int remove1, remove2;
- int ret;
-
- if (pfdset == NULL)
- return;
-
- while (1) {
- struct timeval tv;
- tv.tv_sec = 1;
- tv.tv_usec = 0;
- FD_ZERO(&rfds);
- FD_ZERO(&wfds);
- pthread_mutex_lock(&pfdset->fd_mutex);
-
- maxfds = fdset_fill(&rfds, &wfds, pfdset);
-
- pthread_mutex_unlock(&pfdset->fd_mutex);
-
- /*
- * When select is blocked, other threads might unregister
- * listenfds from and register new listenfds into fdset.
- * When select returns, the entries for listenfds in the fdset
- * might have been updated. It is ok if there is unwanted call
- * for new listenfds.
- */
- ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
- if (ret <= 0)
- continue;
-
- for (i = 0; i < num; i++) {
- remove1 = remove2 = 0;
- pthread_mutex_lock(&pfdset->fd_mutex);
- pfdentry = &pfdset->fd[i];
- fd = pfdentry->fd;
- rcb = pfdentry->rcb;
- wcb = pfdentry->wcb;
- dat = pfdentry->dat;
- pfdentry->busy = 1;
- pthread_mutex_unlock(&pfdset->fd_mutex);
- if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
- rcb(fd, dat, &remove1);
- if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
- wcb(fd, dat, &remove2);
- pfdentry->busy = 0;
- /*
- * fdset_del needs to check busy flag.
- * We don't allow fdset_del to be called in callback
- * directly.
- */
- /*
- * When we are to clean up the fd from fdset,
- * because the fd is closed in the cb,
- * the old fd val could be reused by when creates new
- * listen fd in another thread, we couldn't call
- * fd_set_del.
- */
- if (remove1 || remove2)
- fdset_del_slot(pfdset, i);
- }
- }
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _FD_MAN_H_
-#define _FD_MAN_H_
-#include <stdint.h>
-#include <pthread.h>
-
-#define MAX_FDS 1024
-
-typedef void (*fd_cb)(int fd, void *dat, int *remove);
-
-struct fdentry {
- int fd; /* -1 indicates this entry is empty */
- fd_cb rcb; /* callback when this fd is readable. */
- fd_cb wcb; /* callback when this fd is writeable.*/
- void *dat; /* fd context */
- int busy; /* whether this entry is being used in cb. */
-};
-
-struct fdset {
- struct fdentry fd[MAX_FDS];
- pthread_mutex_t fd_mutex;
- int num; /* current fd number of this fdset */
-};
-
-
-void fdset_init(struct fdset *pfdset);
-
-int fdset_add(struct fdset *pfdset, int fd,
- fd_cb rcb, fd_cb wcb, void *dat);
-
-void *fdset_del(struct fdset *pfdset, int fd);
-
-void fdset_event_dispatch(struct fdset *pfdset);
-
-#endif
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/queue.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <pthread.h>
-
-#include <rte_log.h>
-#include <rte_virtio_net.h>
-
-#include "fd_man.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-#include "virtio-net-user.h"
-
-/*
- * Every time rte_vhost_driver_register() is invoked, an associated
- * vhost_user_socket struct will be created.
- */
-struct vhost_user_socket {
- char *path;
- int listenfd;
- int connfd;
- bool is_server;
- bool reconnect;
-};
-
-struct vhost_user_connection {
- struct vhost_user_socket *vsocket;
- int vid;
-};
-
-#define MAX_VHOST_SOCKET 1024
-struct vhost_user {
- struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
- struct fdset fdset;
- int vsocket_cnt;
- pthread_mutex_t mutex;
-};
-
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vhost_user_server_new_connection(int fd, void *data, int *remove);
-static void vhost_user_msg_handler(int fd, void *dat, int *remove);
-static int vhost_user_create_client(struct vhost_user_socket *vsocket);
-
-static struct vhost_user vhost_user = {
- .fdset = {
- .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
- .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
- .num = 0
- },
- .vsocket_cnt = 0,
- .mutex = PTHREAD_MUTEX_INITIALIZER,
-};
-
-static const char *vhost_message_str[VHOST_USER_MAX] = {
- [VHOST_USER_NONE] = "VHOST_USER_NONE",
- [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
- [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
- [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
- [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
- [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
- [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
- [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
- [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
- [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
- [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
- [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
- [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
- [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
- [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
- [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
- [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
- [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
- [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
- [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
-};
-
-/* return bytes# of read on success or negative val on failure. */
-static int
-read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
-{
- struct iovec iov;
- struct msghdr msgh;
- size_t fdsize = fd_num * sizeof(int);
- char control[CMSG_SPACE(fdsize)];
- struct cmsghdr *cmsg;
- int ret;
-
- memset(&msgh, 0, sizeof(msgh));
- iov.iov_base = buf;
- iov.iov_len = buflen;
-
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
- msgh.msg_control = control;
- msgh.msg_controllen = sizeof(control);
-
- ret = recvmsg(sockfd, &msgh, 0);
- if (ret <= 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
- return ret;
- }
-
- if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
- RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
- return -1;
- }
-
- for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
- if ((cmsg->cmsg_level == SOL_SOCKET) &&
- (cmsg->cmsg_type == SCM_RIGHTS)) {
- memcpy(fds, CMSG_DATA(cmsg), fdsize);
- break;
- }
- }
-
- return ret;
-}
-
-/* return bytes# of read on success or negative val on failure. */
-static int
-read_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
- int ret;
-
- ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
- msg->fds, VHOST_MEMORY_MAX_NREGIONS);
- if (ret <= 0)
- return ret;
-
- if (msg && msg->size) {
- if (msg->size > sizeof(msg->payload)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "invalid msg size: %d\n", msg->size);
- return -1;
- }
- ret = read(sockfd, &msg->payload, msg->size);
- if (ret <= 0)
- return ret;
- if (ret != (int)msg->size) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "read control message failed\n");
- return -1;
- }
- }
-
- return ret;
-}
-
-static int
-send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
-{
-
- struct iovec iov;
- struct msghdr msgh;
- size_t fdsize = fd_num * sizeof(int);
- char control[CMSG_SPACE(fdsize)];
- struct cmsghdr *cmsg;
- int ret;
-
- memset(&msgh, 0, sizeof(msgh));
- iov.iov_base = buf;
- iov.iov_len = buflen;
-
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
-
- if (fds && fd_num > 0) {
- msgh.msg_control = control;
- msgh.msg_controllen = sizeof(control);
- cmsg = CMSG_FIRSTHDR(&msgh);
- cmsg->cmsg_len = CMSG_LEN(fdsize);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- memcpy(CMSG_DATA(cmsg), fds, fdsize);
- } else {
- msgh.msg_control = NULL;
- msgh.msg_controllen = 0;
- }
-
- do {
- ret = sendmsg(sockfd, &msgh, 0);
- } while (ret < 0 && errno == EINTR);
-
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
- return ret;
- }
-
- return ret;
-}
-
-static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
- int ret;
-
- if (!msg)
- return 0;
-
- msg->flags &= ~VHOST_USER_VERSION_MASK;
- msg->flags |= VHOST_USER_VERSION;
- msg->flags |= VHOST_USER_REPLY_MASK;
-
- ret = send_fd_message(sockfd, (char *)msg,
- VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
- return ret;
-}
-
-
-static void
-vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
-{
- int vid;
- size_t size;
- struct vhost_user_connection *conn;
- int ret;
-
- conn = malloc(sizeof(*conn));
- if (conn == NULL) {
- close(fd);
- return;
- }
-
- vid = vhost_new_device();
- if (vid == -1) {
- close(fd);
- free(conn);
- return;
- }
-
- size = strnlen(vsocket->path, PATH_MAX);
- vhost_set_ifname(vid, vsocket->path, size);
-
- RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
-
- vsocket->connfd = fd;
- conn->vsocket = vsocket;
- conn->vid = vid;
- ret = fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler,
- NULL, conn);
- if (ret < 0) {
- vsocket->connfd = -1;
- free(conn);
- close(fd);
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to add fd %d into vhost server fdset\n",
- fd);
- }
-}
-
-/* call back when there is new vhost-user connection from client */
-static void
-vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
-{
- struct vhost_user_socket *vsocket = dat;
-
- fd = accept(fd, NULL, NULL);
- if (fd < 0)
- return;
-
- RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
- vhost_user_add_connection(fd, vsocket);
-}
-
-/* callback when there is message on the connfd */
-static void
-vhost_user_msg_handler(int connfd, void *dat, int *remove)
-{
- int vid;
- struct vhost_user_connection *conn = dat;
- struct VhostUserMsg msg;
- uint64_t features;
- int ret;
-
- vid = conn->vid;
- ret = read_vhost_message(connfd, &msg);
- if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
- struct vhost_user_socket *vsocket = conn->vsocket;
-
- if (ret < 0)
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost read message failed\n");
- else if (ret == 0)
- RTE_LOG(INFO, VHOST_CONFIG,
- "vhost peer closed\n");
- else
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost read incorrect message\n");
-
- vsocket->connfd = -1;
- close(connfd);
- *remove = 1;
- free(conn);
- vhost_destroy_device(vid);
-
- if (vsocket->reconnect)
- vhost_user_create_client(vsocket);
-
- return;
- }
-
- RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
- vhost_message_str[msg.request]);
- switch (msg.request) {
- case VHOST_USER_GET_FEATURES:
- ret = vhost_get_features(vid, &features);
- msg.payload.u64 = features;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_FEATURES:
- features = msg.payload.u64;
- vhost_set_features(vid, &features);
- break;
-
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- user_set_protocol_features(vid, msg.payload.u64);
- break;
-
- case VHOST_USER_SET_OWNER:
- vhost_set_owner(vid);
- break;
- case VHOST_USER_RESET_OWNER:
- vhost_reset_owner(vid);
- break;
-
- case VHOST_USER_SET_MEM_TABLE:
- user_set_mem_table(vid, &msg);
- break;
-
- case VHOST_USER_SET_LOG_BASE:
- user_set_log_base(vid, &msg);
-
- /* it needs a reply */
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_LOG_FD:
- close(msg.fds[0]);
- RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
- break;
-
- case VHOST_USER_SET_VRING_NUM:
- vhost_set_vring_num(vid, &msg.payload.state);
- break;
- case VHOST_USER_SET_VRING_ADDR:
- vhost_set_vring_addr(vid, &msg.payload.addr);
- break;
- case VHOST_USER_SET_VRING_BASE:
- vhost_set_vring_base(vid, &msg.payload.state);
- break;
-
- case VHOST_USER_GET_VRING_BASE:
- ret = user_get_vring_base(vid, &msg.payload.state);
- msg.size = sizeof(msg.payload.state);
- send_vhost_message(connfd, &msg);
- break;
-
- case VHOST_USER_SET_VRING_KICK:
- user_set_vring_kick(vid, &msg);
- break;
- case VHOST_USER_SET_VRING_CALL:
- user_set_vring_call(vid, &msg);
- break;
-
- case VHOST_USER_SET_VRING_ERR:
- if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
- close(msg.fds[0]);
- RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
- break;
-
- case VHOST_USER_GET_QUEUE_NUM:
- msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
-
- case VHOST_USER_SET_VRING_ENABLE:
- user_set_vring_enable(vid, &msg.payload.state);
- break;
- case VHOST_USER_SEND_RARP:
- user_send_rarp(vid, &msg);
- break;
-
- default:
- break;
-
- }
-}
-
-static int
-create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
-{
- int fd;
-
- fd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (fd < 0)
- return -1;
- RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
- is_server ? "server" : "client", fd);
-
- if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost-user: can't set nonblocking mode for socket, fd: "
- "%d (%s)\n", fd, strerror(errno));
- close(fd);
- return -1;
- }
-
- memset(un, 0, sizeof(*un));
- un->sun_family = AF_UNIX;
- strncpy(un->sun_path, path, sizeof(un->sun_path));
- un->sun_path[sizeof(un->sun_path) - 1] = '\0';
-
- return fd;
-}
-
-static int
-vhost_user_create_server(struct vhost_user_socket *vsocket)
-{
- int fd;
- int ret;
- struct sockaddr_un un;
- const char *path = vsocket->path;
-
- fd = create_unix_socket(path, &un, vsocket->is_server);
- if (fd < 0)
- return -1;
-
- ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to bind to %s: %s; remove it and try again\n",
- path, strerror(errno));
- goto err;
- }
- RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
- ret = listen(fd, MAX_VIRTIO_BACKLOG);
- if (ret < 0)
- goto err;
-
- vsocket->listenfd = fd;
- ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
- NULL, vsocket);
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to add listen fd %d to vhost server fdset\n",
- fd);
- goto err;
- }
-
- return 0;
-
-err:
- close(fd);
- return -1;
-}
-
-struct vhost_user_reconnect {
- struct sockaddr_un un;
- int fd;
- struct vhost_user_socket *vsocket;
-
- TAILQ_ENTRY(vhost_user_reconnect) next;
-};
-
-TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
-struct vhost_user_reconnect_list {
- struct vhost_user_reconnect_tailq_list head;
- pthread_mutex_t mutex;
-};
-
-static struct vhost_user_reconnect_list reconn_list;
-static pthread_t reconn_tid;
-
-static int
-vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
-{
- int ret, flags;
-
- ret = connect(fd, un, sz);
- if (ret < 0 && errno != EISCONN)
- return -1;
-
- flags = fcntl(fd, F_GETFL, 0);
- if (flags < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "can't get flags for connfd %d\n", fd);
- return -2;
- }
- if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "can't disable nonblocking on fd %d\n", fd);
- return -2;
- }
- return 0;
-}
-
-static void *
-vhost_user_client_reconnect(void *arg __rte_unused)
-{
- int ret;
- struct vhost_user_reconnect *reconn, *next;
-
- while (1) {
- pthread_mutex_lock(&reconn_list.mutex);
-
- /*
- * An equal implementation of TAILQ_FOREACH_SAFE,
- * which does not exist on all platforms.
- */
- for (reconn = TAILQ_FIRST(&reconn_list.head);
- reconn != NULL; reconn = next) {
- next = TAILQ_NEXT(reconn, next);
-
- ret = vhost_user_connect_nonblock(reconn->fd,
- (struct sockaddr *)&reconn->un,
- sizeof(reconn->un));
- if (ret == -2) {
- close(reconn->fd);
- RTE_LOG(ERR, VHOST_CONFIG,
- "reconnection for fd %d failed\n",
- reconn->fd);
- goto remove_fd;
- }
- if (ret == -1)
- continue;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "%s: connected\n", reconn->vsocket->path);
- vhost_user_add_connection(reconn->fd, reconn->vsocket);
-remove_fd:
- TAILQ_REMOVE(&reconn_list.head, reconn, next);
- free(reconn);
- }
-
- pthread_mutex_unlock(&reconn_list.mutex);
- sleep(1);
- }
-
- return NULL;
-}
-
-static int
-vhost_user_reconnect_init(void)
-{
- int ret;
-
- pthread_mutex_init(&reconn_list.mutex, NULL);
- TAILQ_INIT(&reconn_list.head);
-
- ret = pthread_create(&reconn_tid, NULL,
- vhost_user_client_reconnect, NULL);
- if (ret < 0)
- RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
-
- return ret;
-}
-
-static int
-vhost_user_create_client(struct vhost_user_socket *vsocket)
-{
- int fd;
- int ret;
- struct sockaddr_un un;
- const char *path = vsocket->path;
- struct vhost_user_reconnect *reconn;
-
- fd = create_unix_socket(path, &un, vsocket->is_server);
- if (fd < 0)
- return -1;
-
- ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
- sizeof(un));
- if (ret == 0) {
- vhost_user_add_connection(fd, vsocket);
- return 0;
- }
-
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to connect to %s: %s\n",
- path, strerror(errno));
-
- if (ret == -2 || !vsocket->reconnect) {
- close(fd);
- return -1;
- }
-
- RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
- reconn = malloc(sizeof(*reconn));
- if (reconn == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to allocate memory for reconnect\n");
- close(fd);
- return -1;
- }
- reconn->un = un;
- reconn->fd = fd;
- reconn->vsocket = vsocket;
- pthread_mutex_lock(&reconn_list.mutex);
- TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
- pthread_mutex_unlock(&reconn_list.mutex);
-
- return 0;
-}
-
-/*
- * Register a new vhost-user socket; here we could act as server
- * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
- * is set.
- */
-int
-rte_vhost_driver_register(const char *path, uint64_t flags)
-{
- int ret = -1;
- struct vhost_user_socket *vsocket;
-
- if (!path)
- return -1;
-
- pthread_mutex_lock(&vhost_user.mutex);
-
- if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "error: the number of vhost sockets reaches maximum\n");
- goto out;
- }
-
- vsocket = malloc(sizeof(struct vhost_user_socket));
- if (!vsocket)
- goto out;
- memset(vsocket, 0, sizeof(struct vhost_user_socket));
- vsocket->path = strdup(path);
- vsocket->connfd = -1;
-
- if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
- vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
- if (vsocket->reconnect && reconn_tid == 0) {
- if (vhost_user_reconnect_init() < 0) {
- free(vsocket->path);
- free(vsocket);
- goto out;
- }
- }
- ret = vhost_user_create_client(vsocket);
- } else {
- vsocket->is_server = true;
- ret = vhost_user_create_server(vsocket);
- }
- if (ret < 0) {
- free(vsocket->path);
- free(vsocket);
- goto out;
- }
-
- vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
-
-out:
- pthread_mutex_unlock(&vhost_user.mutex);
-
- return ret;
-}
-
-static bool
-vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
-{
- int found = false;
- struct vhost_user_reconnect *reconn, *next;
-
- pthread_mutex_lock(&reconn_list.mutex);
-
- for (reconn = TAILQ_FIRST(&reconn_list.head);
- reconn != NULL; reconn = next) {
- next = TAILQ_NEXT(reconn, next);
-
- if (reconn->vsocket == vsocket) {
- TAILQ_REMOVE(&reconn_list.head, reconn, next);
- close(reconn->fd);
- free(reconn);
- found = true;
- break;
- }
- }
- pthread_mutex_unlock(&reconn_list.mutex);
- return found;
-}
-
-/**
- * Unregister the specified vhost socket
- */
-int
-rte_vhost_driver_unregister(const char *path)
-{
- int i;
- int count;
- struct vhost_user_connection *conn;
-
- pthread_mutex_lock(&vhost_user.mutex);
-
- for (i = 0; i < vhost_user.vsocket_cnt; i++) {
- struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
-
- if (!strcmp(vsocket->path, path)) {
- if (vsocket->is_server) {
- fdset_del(&vhost_user.fdset, vsocket->listenfd);
- close(vsocket->listenfd);
- unlink(path);
- } else if (vsocket->reconnect) {
- vhost_user_remove_reconnect(vsocket);
- }
-
- conn = fdset_del(&vhost_user.fdset, vsocket->connfd);
- if (conn) {
- RTE_LOG(INFO, VHOST_CONFIG,
- "free connfd = %d for device '%s'\n",
- vsocket->connfd, path);
- close(vsocket->connfd);
- vhost_destroy_device(conn->vid);
- free(conn);
- }
-
- free(vsocket->path);
- free(vsocket);
-
- count = --vhost_user.vsocket_cnt;
- vhost_user.vsockets[i] = vhost_user.vsockets[count];
- vhost_user.vsockets[count] = NULL;
- pthread_mutex_unlock(&vhost_user.mutex);
-
- return 0;
- }
- }
- pthread_mutex_unlock(&vhost_user.mutex);
-
- return -1;
-}
-
-int
-rte_vhost_driver_session_start(void)
-{
- fdset_event_dispatch(&vhost_user.fdset);
- return 0;
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_USER_H
-#define _VHOST_NET_USER_H
-
-#include <stdint.h>
-#include <linux/vhost.h>
-
-#include "rte_virtio_net.h"
-
-/* refer to hw/virtio/vhost-user.c */
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-
-typedef enum VhostUserRequest {
- VHOST_USER_NONE = 0,
- VHOST_USER_GET_FEATURES = 1,
- VHOST_USER_SET_FEATURES = 2,
- VHOST_USER_SET_OWNER = 3,
- VHOST_USER_RESET_OWNER = 4,
- VHOST_USER_SET_MEM_TABLE = 5,
- VHOST_USER_SET_LOG_BASE = 6,
- VHOST_USER_SET_LOG_FD = 7,
- VHOST_USER_SET_VRING_NUM = 8,
- VHOST_USER_SET_VRING_ADDR = 9,
- VHOST_USER_SET_VRING_BASE = 10,
- VHOST_USER_GET_VRING_BASE = 11,
- VHOST_USER_SET_VRING_KICK = 12,
- VHOST_USER_SET_VRING_CALL = 13,
- VHOST_USER_SET_VRING_ERR = 14,
- VHOST_USER_GET_PROTOCOL_FEATURES = 15,
- VHOST_USER_SET_PROTOCOL_FEATURES = 16,
- VHOST_USER_GET_QUEUE_NUM = 17,
- VHOST_USER_SET_VRING_ENABLE = 18,
- VHOST_USER_SEND_RARP = 19,
- VHOST_USER_MAX
-} VhostUserRequest;
-
-typedef struct VhostUserMemoryRegion {
- uint64_t guest_phys_addr;
- uint64_t memory_size;
- uint64_t userspace_addr;
- uint64_t mmap_offset;
-} VhostUserMemoryRegion;
-
-typedef struct VhostUserMemory {
- uint32_t nregions;
- uint32_t padding;
- VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-} VhostUserMemory;
-
-typedef struct VhostUserLog {
- uint64_t mmap_size;
- uint64_t mmap_offset;
-} VhostUserLog;
-
-typedef struct VhostUserMsg {
- VhostUserRequest request;
-
-#define VHOST_USER_VERSION_MASK 0x3
-#define VHOST_USER_REPLY_MASK (0x1 << 2)
- uint32_t flags;
- uint32_t size; /* the following payload size */
- union {
-#define VHOST_USER_VRING_IDX_MASK 0xff
-#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
- uint64_t u64;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- VhostUserMemory memory;
- VhostUserLog log;
- } payload;
- int fds[VHOST_MEMORY_MAX_NREGIONS];
-} __attribute((packed)) VhostUserMsg;
-
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
-/* The version of the protocol we support */
-#define VHOST_USER_VERSION 0x1
-
-/*****************************************************************************/
-#endif
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "virtio-net-user.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-
-struct orig_region_map {
- int fd;
- uint64_t mapped_address;
- uint64_t mapped_size;
- uint64_t blksz;
-};
-
-#define orig_region(ptr, nregions) \
- ((struct orig_region_map *)RTE_PTR_ADD((ptr), \
- sizeof(struct virtio_memory) + \
- sizeof(struct virtio_memory_regions) * (nregions)))
-
-static uint64_t
-get_blk_size(int fd)
-{
- struct stat stat;
- int ret;
-
- ret = fstat(fd, &stat);
- return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
-}
-
-static void
-free_mem_region(struct virtio_net *dev)
-{
- struct orig_region_map *region;
- unsigned int idx;
-
- if (!dev || !dev->mem)
- return;
-
- region = orig_region(dev->mem, dev->mem->nregions);
- for (idx = 0; idx < dev->mem->nregions; idx++) {
- if (region[idx].mapped_address) {
- munmap((void *)(uintptr_t)region[idx].mapped_address,
- region[idx].mapped_size);
- close(region[idx].fd);
- }
- }
-}
-
-void
-vhost_backend_cleanup(struct virtio_net *dev)
-{
- if (dev->mem) {
- free_mem_region(dev);
- free(dev->mem);
- dev->mem = NULL;
- }
- if (dev->log_addr) {
- munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
- dev->log_addr = 0;
- }
-}
-
-int
-user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
-{
- struct VhostUserMemory memory = pmsg->payload.memory;
- struct virtio_memory_regions *pregion;
- uint64_t mapped_address, mapped_size;
- struct virtio_net *dev;
- unsigned int idx = 0;
- struct orig_region_map *pregion_orig;
- uint64_t alignment;
-
- /* unmap old memory regions one by one*/
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* Remove from the data plane. */
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- if (dev->mem) {
- free_mem_region(dev);
- free(dev->mem);
- dev->mem = NULL;
- }
-
- dev->mem = calloc(1,
- sizeof(struct virtio_memory) +
- sizeof(struct virtio_memory_regions) * memory.nregions +
- sizeof(struct orig_region_map) * memory.nregions);
- if (dev->mem == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to allocate memory for dev->mem\n",
- dev->vid);
- return -1;
- }
- dev->mem->nregions = memory.nregions;
-
- pregion_orig = orig_region(dev->mem, memory.nregions);
- for (idx = 0; idx < memory.nregions; idx++) {
- pregion = &dev->mem->regions[idx];
- pregion->guest_phys_address =
- memory.regions[idx].guest_phys_addr;
- pregion->guest_phys_address_end =
- memory.regions[idx].guest_phys_addr +
- memory.regions[idx].memory_size;
- pregion->memory_size =
- memory.regions[idx].memory_size;
- pregion->userspace_address =
- memory.regions[idx].userspace_addr;
-
- /* This is ugly */
- mapped_size = memory.regions[idx].memory_size +
- memory.regions[idx].mmap_offset;
-
- /* mmap() without flag of MAP_ANONYMOUS, should be called
- * with length argument aligned with hugepagesz at older
- * longterm version Linux, like 2.6.32 and 3.2.72, or
- * mmap() will fail with EINVAL.
- *
- * to avoid failure, make sure in caller to keep length
- * aligned.
- */
- alignment = get_blk_size(pmsg->fds[idx]);
- if (alignment == (uint64_t)-1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "couldn't get hugepage size through fstat\n");
- goto err_mmap;
- }
- mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
-
- mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
- mapped_size,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- pmsg->fds[idx],
- 0);
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
- "off:0x%"PRIx64" align:0x%"PRIx64"\n",
- idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
- mapped_size, memory.regions[idx].mmap_offset,
- alignment);
-
- if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "mmap qemu guest failed.\n");
- goto err_mmap;
- }
-
- pregion_orig[idx].mapped_address = mapped_address;
- pregion_orig[idx].mapped_size = mapped_size;
- pregion_orig[idx].blksz = alignment;
- pregion_orig[idx].fd = pmsg->fds[idx];
-
- mapped_address += memory.regions[idx].mmap_offset;
-
- pregion->address_offset = mapped_address -
- pregion->guest_phys_address;
-
- if (memory.regions[idx].guest_phys_addr == 0) {
- dev->mem->base_address =
- memory.regions[idx].userspace_addr;
- dev->mem->mapped_address =
- pregion->address_offset;
- }
-
- LOG_DEBUG(VHOST_CONFIG,
- "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
- idx,
- (void *)(uintptr_t)pregion->guest_phys_address,
- (void *)(uintptr_t)pregion->userspace_address,
- pregion->memory_size);
- }
-
- return 0;
-
-err_mmap:
- while (idx--) {
- munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
- pregion_orig[idx].mapped_size);
- close(pregion_orig[idx].fd);
- }
- free(dev->mem);
- dev->mem = NULL;
- return -1;
-}
-
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
-{
- return vq && vq->desc &&
- vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
- vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
-}
-
-static int
-virtio_is_ready(struct virtio_net *dev)
-{
- struct vhost_virtqueue *rvq, *tvq;
- uint32_t i;
-
- for (i = 0; i < dev->virt_qp_nb; i++) {
- rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
- tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
-
- if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
- RTE_LOG(INFO, VHOST_CONFIG,
- "virtio is not ready for processing.\n");
- return 0;
- }
- }
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "virtio is now ready for processing.\n");
- return 1;
-}
-
-void
-user_set_vring_call(int vid, struct VhostUserMsg *pmsg)
-{
- struct vhost_vring_file file;
-
- file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
- if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
- file.fd = VIRTIO_INVALID_EVENTFD;
- else
- file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring call idx:%d file:%d\n", file.index, file.fd);
- vhost_set_vring_call(vid, &file);
-}
-
-
-/*
- * In vhost-user, when we receive kick message, will test whether virtio
- * device is ready for packet processing.
- */
-void
-user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
-{
- struct vhost_vring_file file;
- struct virtio_net *dev = get_device(vid);
-
- if (!dev)
- return;
-
- file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
- if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
- file.fd = VIRTIO_INVALID_EVENTFD;
- else
- file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring kick idx:%d file:%d\n", file.index, file.fd);
- vhost_set_vring_kick(vid, &file);
-
- if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (notify_ops->new_device(vid) == 0)
- dev->flags |= VIRTIO_DEV_RUNNING;
- }
-}
-
-/*
- * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
- */
-int
-user_get_vring_base(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return -1;
- /* We have to stop the queue (virtio) if it is running. */
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- /* Here we are safe to get the last used index */
- vhost_get_vring_base(vid, state->index, state);
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring base idx:%d file:%d\n", state->index, state->num);
- /*
- * Based on current qemu vhost-user implementation, this message is
- * sent and only sent in vhost_vring_stop.
- * TODO: cleanup the vring, it isn't usable since here.
- */
- if (dev->virtqueue[state->index]->kickfd >= 0)
- close(dev->virtqueue[state->index]->kickfd);
-
- dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
-
- return 0;
-}
-
-/*
- * when virtio queues are ready to work, qemu will send us to
- * enable the virtio queue pair.
- */
-int
-user_set_vring_enable(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev;
- int enable = (int)state->num;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "set queue enable: %d to qp idx: %d\n",
- enable, state->index);
-
- if (notify_ops->vring_state_changed)
- notify_ops->vring_state_changed(vid, state->index, enable);
-
- dev->virtqueue[state->index]->enabled = enable;
-
- return 0;
-}
-
-void
-user_set_protocol_features(int vid, uint64_t protocol_features)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
- return;
-
- dev->protocol_features = protocol_features;
-}
-
-int
-user_set_log_base(int vid, struct VhostUserMsg *msg)
-{
- struct virtio_net *dev;
- int fd = msg->fds[0];
- uint64_t size, off;
- void *addr;
-
- dev = get_device(vid);
- if (!dev)
- return -1;
-
- if (fd < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
- return -1;
- }
-
- if (msg->size != sizeof(VhostUserLog)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "invalid log base msg size: %"PRId32" != %d\n",
- msg->size, (int)sizeof(VhostUserLog));
- return -1;
- }
-
- size = msg->payload.log.mmap_size;
- off = msg->payload.log.mmap_offset;
- RTE_LOG(INFO, VHOST_CONFIG,
- "log mmap size: %"PRId64", offset: %"PRId64"\n",
- size, off);
-
- /*
- * mmap from 0 to workaround a hugepage mmap bug: mmap will
- * fail when offset is not page size aligned.
- */
- addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- close(fd);
- if (addr == MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
- return -1;
- }
-
- /*
- * Free previously mapped log memory on occasionally
- * multiple VHOST_USER_SET_LOG_BASE.
- */
- if (dev->log_addr) {
- munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
- }
- dev->log_addr = (uint64_t)(uintptr_t)addr;
- dev->log_base = dev->log_addr + off;
- dev->log_size = size;
-
- return 0;
-}
-
-/*
- * An rarp packet is constructed and broadcasted to notify switches about
- * the new location of the migrated VM, so that packets from outside will
- * not be lost after migration.
- *
- * However, we don't actually "send" a rarp packet here, instead, we set
- * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
- */
-int
-user_send_rarp(int vid, struct VhostUserMsg *msg)
-{
- struct virtio_net *dev;
- uint8_t *mac = (uint8_t *)&msg->payload.u64;
-
- dev = get_device(vid);
- if (!dev)
- return -1;
-
- RTE_LOG(DEBUG, VHOST_CONFIG,
- ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
- mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
- memcpy(dev->mac.addr_bytes, mac, 6);
-
- /*
- * Set the flag to inject a RARP broadcast packet at
- * rte_vhost_dequeue_burst().
- *
- * rte_smp_wmb() is for making sure the mac is copied
- * before the flag is set.
- */
- rte_smp_wmb();
- rte_atomic16_set(&dev->broadcast_rarp, 1);
-
- return 0;
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTIO_NET_USER_H
-#define _VIRTIO_NET_USER_H
-
-#include "vhost-net.h"
-#include "vhost-net-user.h"
-
-#define VHOST_USER_PROTOCOL_F_MQ 0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
-#define VHOST_USER_PROTOCOL_F_RARP 2
-
-#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
- (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
- (1ULL << VHOST_USER_PROTOCOL_F_RARP))
-
-int user_set_mem_table(int, struct VhostUserMsg *);
-
-void user_set_vring_call(int, struct VhostUserMsg *);
-
-void user_set_vring_kick(int, struct VhostUserMsg *);
-
-void user_set_protocol_features(int vid, uint64_t protocol_features);
-int user_set_log_base(int vid, struct VhostUserMsg *);
-int user_send_rarp(int vid, struct VhostUserMsg *);
-
-int user_get_vring_base(int, struct vhost_vring_state *);
-
-int user_set_vring_enable(int vid, struct vhost_vring_state *state);
-
-#endif
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+struct orig_region_map {
+ int fd;
+ uint64_t mapped_address;
+ uint64_t mapped_size;
+ uint64_t blksz;
+};
+
+#define orig_region(ptr, nregions) \
+ ((struct orig_region_map *)RTE_PTR_ADD((ptr), \
+ sizeof(struct virtio_memory) + \
+ sizeof(struct virtio_memory_regions) * (nregions)))
+
+static uint64_t
+get_blk_size(int fd)
+{
+ struct stat stat;
+ int ret;
+
+ ret = fstat(fd, &stat);
+ return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+ struct orig_region_map *region;
+ unsigned int idx;
+
+ if (!dev || !dev->mem)
+ return;
+
+ region = orig_region(dev->mem, dev->mem->nregions);
+ for (idx = 0; idx < dev->mem->nregions; idx++) {
+ if (region[idx].mapped_address) {
+ munmap((void *)(uintptr_t)region[idx].mapped_address,
+ region[idx].mapped_size);
+ close(region[idx].fd);
+ }
+ }
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+ if (dev->mem) {
+ free_mem_region(dev);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+ if (dev->log_addr) {
+ munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+ dev->log_addr = 0;
+ }
+}
+
+int
+user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
+{
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_regions *pregion;
+ uint64_t mapped_address, mapped_size;
+ struct virtio_net *dev;
+ unsigned int idx = 0;
+ struct orig_region_map *pregion_orig;
+ uint64_t alignment;
+
+ /* unmap old memory regions one by one*/
+ dev = get_device(vid);
+ if (dev == NULL)
+ return -1;
+
+ /* Remove from the data plane. */
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(vid);
+ }
+
+ if (dev->mem) {
+ free_mem_region(dev);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+
+ dev->mem = calloc(1,
+ sizeof(struct virtio_memory) +
+ sizeof(struct virtio_memory_regions) * memory.nregions +
+ sizeof(struct orig_region_map) * memory.nregions);
+ if (dev->mem == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to allocate memory for dev->mem\n",
+ dev->vid);
+ return -1;
+ }
+ dev->mem->nregions = memory.nregions;
+
+ pregion_orig = orig_region(dev->mem, memory.nregions);
+ for (idx = 0; idx < memory.nregions; idx++) {
+ pregion = &dev->mem->regions[idx];
+ pregion->guest_phys_address =
+ memory.regions[idx].guest_phys_addr;
+ pregion->guest_phys_address_end =
+ memory.regions[idx].guest_phys_addr +
+ memory.regions[idx].memory_size;
+ pregion->memory_size =
+ memory.regions[idx].memory_size;
+ pregion->userspace_address =
+ memory.regions[idx].userspace_addr;
+
+ /* This is ugly */
+ mapped_size = memory.regions[idx].memory_size +
+ memory.regions[idx].mmap_offset;
+
+ /* mmap() without flag of MAP_ANONYMOUS, should be called
+ * with length argument aligned with hugepagesz at older
+ * longterm version Linux, like 2.6.32 and 3.2.72, or
+ * mmap() will fail with EINVAL.
+ *
+ * to avoid failure, make sure in caller to keep length
+ * aligned.
+ */
+ alignment = get_blk_size(pmsg->fds[idx]);
+ if (alignment == (uint64_t)-1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "couldn't get hugepage size through fstat\n");
+ goto err_mmap;
+ }
+ mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
+
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ mapped_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
+ 0);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
+ "off:0x%"PRIx64" align:0x%"PRIx64"\n",
+ idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
+ mapped_size, memory.regions[idx].mmap_offset,
+ alignment);
+
+ if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap qemu guest failed.\n");
+ goto err_mmap;
+ }
+
+ pregion_orig[idx].mapped_address = mapped_address;
+ pregion_orig[idx].mapped_size = mapped_size;
+ pregion_orig[idx].blksz = alignment;
+ pregion_orig[idx].fd = pmsg->fds[idx];
+
+ mapped_address += memory.regions[idx].mmap_offset;
+
+ pregion->address_offset = mapped_address -
+ pregion->guest_phys_address;
+
+ if (memory.regions[idx].guest_phys_addr == 0) {
+ dev->mem->base_address =
+ memory.regions[idx].userspace_addr;
+ dev->mem->mapped_address =
+ pregion->address_offset;
+ }
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)pregion->guest_phys_address,
+ (void *)(uintptr_t)pregion->userspace_address,
+ pregion->memory_size);
+ }
+
+ return 0;
+
+err_mmap:
+ while (idx--) {
+ munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
+ pregion_orig[idx].mapped_size);
+ close(pregion_orig[idx].fd);
+ }
+ free(dev->mem);
+ dev->mem = NULL;
+ return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+ return vq && vq->desc &&
+ vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+ vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+ struct vhost_virtqueue *rvq, *tvq;
+ uint32_t i;
+
+ for (i = 0; i < dev->virt_qp_nb; i++) {
+ rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+ tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+ if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is not ready for processing.\n");
+ return 0;
+ }
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is now ready for processing.\n");
+ return 1;
+}
+
+void
+user_set_vring_call(int vid, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring call idx:%d file:%d\n", file.index, file.fd);
+ vhost_set_vring_call(vid, &file);
+}
+
+
+/*
+ * In vhost-user, when we receive kick message, will test whether virtio
+ * device is ready for packet processing.
+ */
+void
+user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+ struct virtio_net *dev = get_device(vid);
+
+ if (!dev)
+ return;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+ vhost_set_vring_kick(vid, &file);
+
+ if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+ if (notify_ops->new_device(vid) == 0)
+ dev->flags |= VIRTIO_DEV_RUNNING;
+ }
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+int
+user_get_vring_base(int vid, struct vhost_vring_state *state)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+ /* We have to stop the queue (virtio) if it is running. */
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(vid);
+ }
+
+ /* Here we are safe to get the last used index */
+ vhost_get_vring_base(vid, state->index, state);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring base idx:%d file:%d\n", state->index, state->num);
+ /*
+ * Based on current qemu vhost-user implementation, this message is
+ * sent and only sent in vhost_vring_stop.
+ * TODO: cleanup the vring, it isn't usable since here.
+ */
+ if (dev->virtqueue[state->index]->kickfd >= 0)
+ close(dev->virtqueue[state->index]->kickfd);
+
+ dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+int
+user_set_vring_enable(int vid, struct vhost_vring_state *state)
+{
+ struct virtio_net *dev;
+ int enable = (int)state->num;
+
+ dev = get_device(vid);
+ if (dev == NULL)
+ return -1;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set queue enable: %d to qp idx: %d\n",
+ enable, state->index);
+
+ if (notify_ops->vring_state_changed)
+ notify_ops->vring_state_changed(vid, state->index, enable);
+
+ dev->virtqueue[state->index]->enabled = enable;
+
+ return 0;
+}
+
+void
+user_set_protocol_features(int vid, uint64_t protocol_features)
+{
+ struct virtio_net *dev;
+
+ dev = get_device(vid);
+ if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+ return;
+
+ dev->protocol_features = protocol_features;
+}
+
+int
+user_set_log_base(int vid, struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev;
+ int fd = msg->fds[0];
+ uint64_t size, off;
+ void *addr;
+
+ dev = get_device(vid);
+ if (!dev)
+ return -1;
+
+ if (fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+ return -1;
+ }
+
+ if (msg->size != sizeof(VhostUserLog)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid log base msg size: %"PRId32" != %d\n",
+ msg->size, (int)sizeof(VhostUserLog));
+ return -1;
+ }
+
+ size = msg->payload.log.mmap_size;
+ off = msg->payload.log.mmap_offset;
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "log mmap size: %"PRId64", offset: %"PRId64"\n",
+ size, off);
+
+ /*
+ * mmap from 0 to workaround a hugepage mmap bug: mmap will
+ * fail when offset is not page size aligned.
+ */
+ addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+ return -1;
+ }
+
+ /*
+ * Free previously mapped log memory on occasionally
+ * multiple VHOST_USER_SET_LOG_BASE.
+ */
+ if (dev->log_addr) {
+ munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+ }
+ dev->log_addr = (uint64_t)(uintptr_t)addr;
+ dev->log_base = dev->log_addr + off;
+ dev->log_size = size;
+
+ return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+int
+user_send_rarp(int vid, struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev;
+ uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+ dev = get_device(vid);
+ if (!dev)
+ return -1;
+
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ memcpy(dev->mac.addr_bytes, mac, 6);
+
+ /*
+ * Set the flag to inject a RARP broadcast packet at
+ * rte_vhost_dequeue_burst().
+ *
+ * rte_smp_wmb() is for making sure the mac is copied
+ * before the flag is set.
+ */
+ rte_smp_wmb();
+ rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+#define VHOST_USER_PROTOCOL_F_RARP 2
+
+#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+ (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
+int user_set_mem_table(int, struct VhostUserMsg *);
+
+void user_set_vring_call(int, struct VhostUserMsg *);
+
+void user_set_vring_kick(int, struct VhostUserMsg *);
+
+void user_set_protocol_features(int vid, uint64_t protocol_features);
+int user_set_log_base(int vid, struct VhostUserMsg *);
+int user_send_rarp(int vid, struct VhostUserMsg *);
+
+int user_get_vring_base(int, struct vhost_vring_state *);
+
+int user_set_vring_enable(int vid, struct vhost_vring_state *state);
+
+#endif