If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.
Example with OVS:
ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
(gdb) bt
#0 connect () from /lib64/libpthread.so.0
#1 vhost_user_create_client (vsocket=0xa816e0)
#2 rte_vhost_driver_register
#3 netdev_dpdk_vhost_user_construct
#4 netdev_open (name=0xa664b0 "vhost1")
[...]
#11 main
Fix that by setting non-blocking mode for client sockets for connection.
Fixes:
64ab701c3d1e ("vhost: add vhost-user client mode")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
#include <sys/un.h>
#include <sys/queue.h>
#include <errno.h>
#include <sys/un.h>
#include <sys/queue.h>
#include <errno.h>
#include <pthread.h>
#include <rte_log.h>
#include <pthread.h>
#include <rte_log.h>
RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
is_server ? "server" : "client", fd);
RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
is_server ? "server" : "client", fd);
+ if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost-user: can't set nonblocking mode for socket, fd: "
+ "%d (%s)\n", fd, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
memset(un, 0, sizeof(*un));
un->sun_family = AF_UNIX;
strncpy(un->sun_path, path, sizeof(un->sun_path));
memset(un, 0, sizeof(*un));
un->sun_family = AF_UNIX;
strncpy(un->sun_path, path, sizeof(un->sun_path));
static struct vhost_user_reconnect_list reconn_list;
static pthread_t reconn_tid;
static struct vhost_user_reconnect_list reconn_list;
static pthread_t reconn_tid;
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+ int ret, flags;
+
+ ret = connect(fd, un, sz);
+ if (ret < 0 && errno != EISCONN)
+ return -1;
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't get flags for connfd %d\n", fd);
+ return -2;
+ }
+ if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't disable nonblocking on fd %d\n", fd);
+ return -2;
+ }
+ return 0;
+}
+
static void *
vhost_user_client_reconnect(void *arg __rte_unused)
{
static void *
vhost_user_client_reconnect(void *arg __rte_unused)
{
struct vhost_user_reconnect *reconn, *next;
while (1) {
struct vhost_user_reconnect *reconn, *next;
while (1) {
reconn != NULL; reconn = next) {
next = TAILQ_NEXT(reconn, next);
reconn != NULL; reconn = next) {
next = TAILQ_NEXT(reconn, next);
- if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
- sizeof(reconn->un)) < 0)
+ ret = vhost_user_connect_nonblock(reconn->fd,
+ (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un));
+ if (ret == -2) {
+ close(reconn->fd);
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "reconnection for fd %d failed\n",
+ reconn->fd);
+ goto remove_fd;
+ }
+ if (ret == -1)
continue;
RTE_LOG(INFO, VHOST_CONFIG,
"%s: connected\n", reconn->vsocket->path);
vhost_user_add_connection(reconn->fd, reconn->vsocket);
continue;
RTE_LOG(INFO, VHOST_CONFIG,
"%s: connected\n", reconn->vsocket->path);
vhost_user_add_connection(reconn->fd, reconn->vsocket);
TAILQ_REMOVE(&reconn_list.head, reconn, next);
free(reconn);
}
TAILQ_REMOVE(&reconn_list.head, reconn, next);
free(reconn);
}
- ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+ ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+ sizeof(un));
if (ret == 0) {
vhost_user_add_connection(fd, vsocket);
return 0;
if (ret == 0) {
vhost_user_add_connection(fd, vsocket);
return 0;
"failed to connect to %s: %s\n",
path, strerror(errno));
"failed to connect to %s: %s\n",
path, strerror(errno));
- if (!vsocket->reconnect) {
+ if (ret == -2 || !vsocket->reconnect) {