From: Tonghao Zhang Date: Fri, 27 Apr 2018 15:19:43 +0000 (-0700) Subject: vhost: fix dead lock on closing in server mode X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=8b4b949144b8;p=dpdk.git vhost: fix dead lock on closing in server mode When qemu close the unix socket fd of the vhostuser as a server, and then immediately delete the vhostuser port on openvswitch. There will be a deadlock. A thread (fdset event thread): B thread: 1. fdset_event_dispatch rte_vhost_driver_unregister 2. set the fd busy to 1. lock vsocket->conn_mutex 3. vhost_user_read_cb fdset_del waits busy changed to 0. 4. vhost peer closed, remove the conn from vsocket->conn_list: lock vsocket->conn_mutex 5. set the fd busy to 0 Fixes: 65388b43f592 ("vhost: fix fd leaks for vhost-user server mode") Cc: stable@dpdk.org Signed-off-by: Tonghao Zhang Acked-by: Maxime Coquelin --- diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 8590ee5074..b24c27dd70 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -174,6 +174,38 @@ fdset_del(struct fdset *pfdset, int fd) return dat; } +/** + * Unregister the fd from the fdset. + * + * If parameters are invalid, return directly -2. + * And check whether fd is busy, if yes, return -1. + * Otherwise, try to delete the fd from fdset and + * return true. + */ +int +fdset_try_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -2; + + pthread_mutex_lock(&pfdset->fd_mutex); + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -1; + } + + if (i != -1) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + } + + pthread_mutex_unlock(&pfdset->fd_mutex); + return 0; +} /** * This functions runs in infinite blocking loop until there is no fd in diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 76a42fb3df..3331bcd97a 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -44,6 +44,7 @@ int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); void *fdset_del(struct fdset *pfdset, int fd); +int fdset_try_del(struct fdset *pfdset, int fd); void *fdset_event_dispatch(void *arg); diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 4a561add94..4ce916dc2f 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -922,13 +922,25 @@ rte_vhost_driver_unregister(const char *path) vhost_user_remove_reconnect(vsocket); } +again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; conn = next) { next = TAILQ_NEXT(conn, next); - fdset_del(&vhost_user.fdset, conn->connfd); + /* + * If r/wcb is executing, release the + * conn_mutex lock, and try again since + * the r/wcb may use the conn_mutex lock. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { + pthread_mutex_unlock( + &vsocket->conn_mutex); + goto again; + } + RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", conn->connfd, path);