From 81b5d22f1d52c1ad4f6d069c14199c91f608435d Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 21 Jul 2016 16:19:35 +0300 Subject: [PATCH] vhost: fix connect hang in client mode If something abnormal happened to QEMU, 'connect()' can block calling thread (e.g. main thread of OVS) forever or for a really long time. This can break whole application or block the reconnection thread. Example with OVS: ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce (gdb) bt #0 connect () from /lib64/libpthread.so.0 #1 vhost_user_create_client (vsocket=0xa816e0) #2 rte_vhost_driver_register #3 netdev_dpdk_vhost_user_construct #4 netdev_open (name=0xa664b0 "vhost1") [...] #11 main Fix that by setting non-blocking mode for client sockets for connection. Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode") Signed-off-by: Ilya Maximets Acked-by: Yuanhan Liu --- lib/librte_vhost/vhost_user/vhost-net-user.c | 52 ++++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c index f0ea3a2744..f60cb13663 100644 --- a/lib/librte_vhost/vhost_user/vhost-net-user.c +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -445,6 +446,14 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", is_server ? "server" : "client", fd); + if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { + RTE_LOG(ERR, VHOST_CONFIG, + "vhost-user: can't set nonblocking mode for socket, fd: " + "%d (%s)\n", fd, strerror(errno)); + close(fd); + return -1; + } + memset(un, 0, sizeof(*un)); un->sun_family = AF_UNIX; strncpy(un->sun_path, path, sizeof(un->sun_path)); @@ -512,9 +521,33 @@ struct vhost_user_reconnect_list { static struct vhost_user_reconnect_list reconn_list; static pthread_t reconn_tid; +static int +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) +{ + int ret, flags; + + ret = connect(fd, un, sz); + if (ret < 0 && errno != EISCONN) + return -1; + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "can't get flags for connfd %d\n", fd); + return -2; + } + if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { + RTE_LOG(ERR, VHOST_CONFIG, + "can't disable nonblocking on fd %d\n", fd); + return -2; + } + return 0; +} + static void * vhost_user_client_reconnect(void *arg __rte_unused) { + int ret; struct vhost_user_reconnect *reconn, *next; while (1) { @@ -528,13 +561,23 @@ vhost_user_client_reconnect(void *arg __rte_unused) reconn != NULL; reconn = next) { next = TAILQ_NEXT(reconn, next); - if (connect(reconn->fd, (struct sockaddr *)&reconn->un, - sizeof(reconn->un)) < 0) + ret = vhost_user_connect_nonblock(reconn->fd, + (struct sockaddr *)&reconn->un, + sizeof(reconn->un)); + if (ret == -2) { + close(reconn->fd); + RTE_LOG(ERR, VHOST_CONFIG, + "reconnection for fd %d failed\n", + reconn->fd); + goto remove_fd; + } + if (ret == -1) continue; RTE_LOG(INFO, VHOST_CONFIG, "%s: connected\n", reconn->vsocket->path); vhost_user_add_connection(reconn->fd, reconn->vsocket); +remove_fd: TAILQ_REMOVE(&reconn_list.head, reconn, next); free(reconn); } @@ -575,7 +618,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) if (fd < 0) return -1; - ret = connect(fd, (struct sockaddr *)&un, sizeof(un)); + ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un, + sizeof(un)); if (ret == 0) { vhost_user_add_connection(fd, vsocket); return 0; @@ -585,7 +629,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) "failed to connect to %s: %s\n", path, strerror(errno)); - if (!vsocket->reconnect) { + if (ret == -2 || !vsocket->reconnect) { close(fd); return -1; } -- 2.20.1