From: Tonghao Zhang Date: Wed, 28 Mar 2018 05:49:25 +0000 (-0700) Subject: vhost: add pipe event for optimizing negotiation X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=d64c43773abe9326cc028ecda81b8f9e4117b551;p=dpdk.git vhost: add pipe event for optimizing negotiation When vhost-user connects qemu successfully, dpdk will call the vhost_user_add_connection to add unix socket fd to poll. And fdset_add only set the socket fd to a fdentry while poll may sleep now. In a general case, this is no problem. But if we use hot update for vhost-user, most downtime of VMs network is 750+ms. This patch adds pipe event, so after connections are ok, dpdk rebuild the poll immediately. With this patch, the most downtime is 20~30ms. Signed-off-by: Tonghao Zhang Reviewed-by: Maxime Coquelin --- diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 181711c2ab..8590ee5074 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -16,6 +16,9 @@ #include "fd_man.h" + +#define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1 + #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) static int @@ -272,3 +275,64 @@ fdset_event_dispatch(void *arg) return NULL; } + +static void +fdset_pipe_read_cb(int readfd, void *dat __rte_unused, + int *remove __rte_unused) +{ + char charbuf[16]; + int r = read(readfd, charbuf, sizeof(charbuf)); + /* + * Just an optimization, we don't care if read() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); +} + +void +fdset_pipe_uninit(struct fdset *fdset) +{ + fdset_del(fdset, fdset->u.readfd); + close(fdset->u.readfd); + close(fdset->u.writefd); +} + +int +fdset_pipe_init(struct fdset *fdset) +{ + int ret; + + if (pipe(fdset->u.pipefd) < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to create pipe for vhost fdset\n"); + return -1; + } + + ret = fdset_add(fdset, fdset->u.readfd, + fdset_pipe_read_cb, NULL, NULL); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to add pipe readfd %d into vhost server fdset\n", + fdset->u.readfd); + + fdset_pipe_uninit(fdset); + return -1; + } + + return 0; +} + +void +fdset_pipe_notify(struct fdset *fdset) +{ + int r = write(fdset->u.writefd, "1", 1); + /* + * Just an optimization, we don't care if write() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); + +} diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 3a9276c3cf..76a42fb3df 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -25,6 +25,16 @@ struct fdset { struct fdentry fd[MAX_FDS]; pthread_mutex_t fd_mutex; int num; /* current fd number of this fdset */ + + union pipefds { + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; + } u; }; @@ -37,4 +47,10 @@ void *fdset_del(struct fdset *pfdset, int fd); void *fdset_event_dispatch(void *arg); +int fdset_pipe_init(struct fdset *fdset); + +void fdset_pipe_uninit(struct fdset *fdset); + +void fdset_pipe_notify(struct fdset *fdset); + #endif diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index ca9bfcdd2a..a0a95f9506 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -242,6 +242,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) pthread_mutex_lock(&vsocket->conn_mutex); TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); pthread_mutex_unlock(&vsocket->conn_mutex); + + fdset_pipe_notify(&vhost_user.fdset); return; err: @@ -850,11 +852,23 @@ rte_vhost_driver_start(const char *path) return -1; if (fdset_tid == 0) { + /** + * create a pipe which will be waited by poll and notified to + * rebuild the wait list of poll. + */ + if (fdset_pipe_init(&vhost_user.fdset) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to create pipe for vhost fdset\n"); + return -1; + } + int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch, &vhost_user.fdset); if (ret != 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create fdset handling thread"); + + fdset_pipe_uninit(&vhost_user.fdset); return -1; } else { snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,