1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
11 #include <sys/types.h>
12 #include <sys/socket.h>
14 #include <sys/queue.h>
23 #include "vhost_user.h"
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
29 * Every time rte_vhost_driver_register() is invoked, an associated
30 * vhost_user_socket struct will be created.
32 struct vhost_user_socket {
33 struct vhost_user_connection_list conn_list;
34 pthread_mutex_t conn_mutex;
37 struct sockaddr_un un;
40 bool dequeue_zero_copy;
42 bool use_builtin_virtio_net;
45 * The "supported_features" indicates the feature bits the
46 * vhost driver supports. The "features" indicates the feature
47 * bits after the rte_vhost_driver_features_disable/enable().
48 * It is also the final feature bits used for vhost-user
49 * features negotiation.
51 uint64_t supported_features;
55 * Device id to identify a specific backend device.
56 * It's set to -1 for the default software implementation.
57 * If valid, one socket can have 1 connection only.
61 struct vhost_device_ops const *notify_ops;
64 struct vhost_user_connection {
65 struct vhost_user_socket *vsocket;
69 TAILQ_ENTRY(vhost_user_connection) next;
72 #define MAX_VHOST_SOCKET 1024
74 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
77 pthread_mutex_t mutex;
80 #define MAX_VIRTIO_BACKLOG 128
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
87 static struct vhost_user vhost_user = {
89 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
94 .mutex = PTHREAD_MUTEX_INITIALIZER,
97 /* return bytes# of read on success or negative val on failure. */
99 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
103 size_t fdsize = fd_num * sizeof(int);
104 char control[CMSG_SPACE(fdsize)];
105 struct cmsghdr *cmsg;
109 memset(&msgh, 0, sizeof(msgh));
111 iov.iov_len = buflen;
115 msgh.msg_control = control;
116 msgh.msg_controllen = sizeof(control);
118 ret = recvmsg(sockfd, &msgh, 0);
120 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
124 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
125 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
129 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
130 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
131 if ((cmsg->cmsg_level == SOL_SOCKET) &&
132 (cmsg->cmsg_type == SCM_RIGHTS)) {
133 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
134 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
139 /* Clear out unused file descriptors */
140 while (got_fds < fd_num)
147 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
152 size_t fdsize = fd_num * sizeof(int);
153 char control[CMSG_SPACE(fdsize)];
154 struct cmsghdr *cmsg;
157 memset(&msgh, 0, sizeof(msgh));
159 iov.iov_len = buflen;
164 if (fds && fd_num > 0) {
165 msgh.msg_control = control;
166 msgh.msg_controllen = sizeof(control);
167 cmsg = CMSG_FIRSTHDR(&msgh);
169 RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
173 cmsg->cmsg_len = CMSG_LEN(fdsize);
174 cmsg->cmsg_level = SOL_SOCKET;
175 cmsg->cmsg_type = SCM_RIGHTS;
176 memcpy(CMSG_DATA(cmsg), fds, fdsize);
178 msgh.msg_control = NULL;
179 msgh.msg_controllen = 0;
183 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
184 } while (ret < 0 && errno == EINTR);
187 RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
195 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
199 struct vhost_user_connection *conn;
202 conn = malloc(sizeof(*conn));
208 vid = vhost_new_device();
213 size = strnlen(vsocket->path, PATH_MAX);
214 vhost_set_ifname(vid, vsocket->path, size);
216 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
218 if (vsocket->dequeue_zero_copy)
219 vhost_enable_dequeue_zero_copy(vid);
221 RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
223 if (vsocket->notify_ops->new_connection) {
224 ret = vsocket->notify_ops->new_connection(vid);
226 RTE_LOG(ERR, VHOST_CONFIG,
227 "failed to add vhost user connection with fd %d\n",
234 conn->vsocket = vsocket;
236 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
239 RTE_LOG(ERR, VHOST_CONFIG,
240 "failed to add fd %d into vhost server fdset\n",
243 if (vsocket->notify_ops->destroy_connection)
244 vsocket->notify_ops->destroy_connection(conn->vid);
249 pthread_mutex_lock(&vsocket->conn_mutex);
250 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
251 pthread_mutex_unlock(&vsocket->conn_mutex);
253 fdset_pipe_notify(&vhost_user.fdset);
261 /* call back when there is new vhost-user connection from client */
263 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
265 struct vhost_user_socket *vsocket = dat;
267 fd = accept(fd, NULL, NULL);
271 RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
272 vhost_user_add_connection(fd, vsocket);
276 vhost_user_read_cb(int connfd, void *dat, int *remove)
278 struct vhost_user_connection *conn = dat;
279 struct vhost_user_socket *vsocket = conn->vsocket;
282 ret = vhost_user_msg_handler(conn->vid, connfd);
286 vhost_destroy_device(conn->vid);
288 if (vsocket->notify_ops->destroy_connection)
289 vsocket->notify_ops->destroy_connection(conn->vid);
291 pthread_mutex_lock(&vsocket->conn_mutex);
292 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
293 pthread_mutex_unlock(&vsocket->conn_mutex);
297 if (vsocket->reconnect) {
298 create_unix_socket(vsocket);
299 vhost_user_start_client(vsocket);
305 create_unix_socket(struct vhost_user_socket *vsocket)
308 struct sockaddr_un *un = &vsocket->un;
310 fd = socket(AF_UNIX, SOCK_STREAM, 0);
313 RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
314 vsocket->is_server ? "server" : "client", fd);
316 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
317 RTE_LOG(ERR, VHOST_CONFIG,
318 "vhost-user: can't set nonblocking mode for socket, fd: "
319 "%d (%s)\n", fd, strerror(errno));
324 memset(un, 0, sizeof(*un));
325 un->sun_family = AF_UNIX;
326 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
327 un->sun_path[sizeof(un->sun_path) - 1] = '\0';
329 vsocket->socket_fd = fd;
334 vhost_user_start_server(struct vhost_user_socket *vsocket)
337 int fd = vsocket->socket_fd;
338 const char *path = vsocket->path;
341 * bind () may fail if the socket file with the same name already
342 * exists. But the library obviously should not delete the file
343 * provided by the user, since we can not be sure that it is not
344 * being used by other applications. Moreover, many applications form
345 * socket names based on user input, which is prone to errors.
347 * The user must ensure that the socket does not exist before
348 * registering the vhost driver in server mode.
350 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
352 RTE_LOG(ERR, VHOST_CONFIG,
353 "failed to bind to %s: %s; remove it and try again\n",
354 path, strerror(errno));
357 RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
359 ret = listen(fd, MAX_VIRTIO_BACKLOG);
363 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
366 RTE_LOG(ERR, VHOST_CONFIG,
367 "failed to add listen fd %d to vhost server fdset\n",
379 struct vhost_user_reconnect {
380 struct sockaddr_un un;
382 struct vhost_user_socket *vsocket;
384 TAILQ_ENTRY(vhost_user_reconnect) next;
387 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
388 struct vhost_user_reconnect_list {
389 struct vhost_user_reconnect_tailq_list head;
390 pthread_mutex_t mutex;
393 static struct vhost_user_reconnect_list reconn_list;
394 static pthread_t reconn_tid;
397 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
401 ret = connect(fd, un, sz);
402 if (ret < 0 && errno != EISCONN)
405 flags = fcntl(fd, F_GETFL, 0);
407 RTE_LOG(ERR, VHOST_CONFIG,
408 "can't get flags for connfd %d\n", fd);
411 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
412 RTE_LOG(ERR, VHOST_CONFIG,
413 "can't disable nonblocking on fd %d\n", fd);
420 vhost_user_client_reconnect(void *arg __rte_unused)
423 struct vhost_user_reconnect *reconn, *next;
426 pthread_mutex_lock(&reconn_list.mutex);
429 * An equal implementation of TAILQ_FOREACH_SAFE,
430 * which does not exist on all platforms.
432 for (reconn = TAILQ_FIRST(&reconn_list.head);
433 reconn != NULL; reconn = next) {
434 next = TAILQ_NEXT(reconn, next);
436 ret = vhost_user_connect_nonblock(reconn->fd,
437 (struct sockaddr *)&reconn->un,
441 RTE_LOG(ERR, VHOST_CONFIG,
442 "reconnection for fd %d failed\n",
449 RTE_LOG(INFO, VHOST_CONFIG,
450 "%s: connected\n", reconn->vsocket->path);
451 vhost_user_add_connection(reconn->fd, reconn->vsocket);
453 TAILQ_REMOVE(&reconn_list.head, reconn, next);
457 pthread_mutex_unlock(&reconn_list.mutex);
465 vhost_user_reconnect_init(void)
468 char thread_name[RTE_MAX_THREAD_NAME_LEN];
470 ret = pthread_mutex_init(&reconn_list.mutex, NULL);
472 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex");
475 TAILQ_INIT(&reconn_list.head);
477 ret = pthread_create(&reconn_tid, NULL,
478 vhost_user_client_reconnect, NULL);
480 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
481 if (pthread_mutex_destroy(&reconn_list.mutex)) {
482 RTE_LOG(ERR, VHOST_CONFIG,
483 "failed to destroy reconnect mutex");
486 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
489 if (rte_thread_setname(reconn_tid, thread_name)) {
490 RTE_LOG(DEBUG, VHOST_CONFIG,
491 "failed to set reconnect thread name");
499 vhost_user_start_client(struct vhost_user_socket *vsocket)
502 int fd = vsocket->socket_fd;
503 const char *path = vsocket->path;
504 struct vhost_user_reconnect *reconn;
506 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
507 sizeof(vsocket->un));
509 vhost_user_add_connection(fd, vsocket);
513 RTE_LOG(WARNING, VHOST_CONFIG,
514 "failed to connect to %s: %s\n",
515 path, strerror(errno));
517 if (ret == -2 || !vsocket->reconnect) {
522 RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
523 reconn = malloc(sizeof(*reconn));
524 if (reconn == NULL) {
525 RTE_LOG(ERR, VHOST_CONFIG,
526 "failed to allocate memory for reconnect\n");
530 reconn->un = vsocket->un;
532 reconn->vsocket = vsocket;
533 pthread_mutex_lock(&reconn_list.mutex);
534 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
535 pthread_mutex_unlock(&reconn_list.mutex);
540 static struct vhost_user_socket *
541 find_vhost_user_socket(const char *path)
545 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
546 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
548 if (!strcmp(vsocket->path, path))
556 rte_vhost_driver_attach_vdpa_device(const char *path, int did)
558 struct vhost_user_socket *vsocket;
560 if (rte_vdpa_get_device(did) == NULL)
563 pthread_mutex_lock(&vhost_user.mutex);
564 vsocket = find_vhost_user_socket(path);
566 vsocket->vdpa_dev_id = did;
567 pthread_mutex_unlock(&vhost_user.mutex);
569 return vsocket ? 0 : -1;
573 rte_vhost_driver_detach_vdpa_device(const char *path)
575 struct vhost_user_socket *vsocket;
577 pthread_mutex_lock(&vhost_user.mutex);
578 vsocket = find_vhost_user_socket(path);
580 vsocket->vdpa_dev_id = -1;
581 pthread_mutex_unlock(&vhost_user.mutex);
583 return vsocket ? 0 : -1;
587 rte_vhost_driver_get_vdpa_device_id(const char *path)
589 struct vhost_user_socket *vsocket;
592 pthread_mutex_lock(&vhost_user.mutex);
593 vsocket = find_vhost_user_socket(path);
595 did = vsocket->vdpa_dev_id;
596 pthread_mutex_unlock(&vhost_user.mutex);
602 rte_vhost_driver_disable_features(const char *path, uint64_t features)
604 struct vhost_user_socket *vsocket;
606 pthread_mutex_lock(&vhost_user.mutex);
607 vsocket = find_vhost_user_socket(path);
609 /* Note that use_builtin_virtio_net is not affected by this function
610 * since callers may want to selectively disable features of the
611 * built-in vhost net device backend.
615 vsocket->features &= ~features;
616 pthread_mutex_unlock(&vhost_user.mutex);
618 return vsocket ? 0 : -1;
622 rte_vhost_driver_enable_features(const char *path, uint64_t features)
624 struct vhost_user_socket *vsocket;
626 pthread_mutex_lock(&vhost_user.mutex);
627 vsocket = find_vhost_user_socket(path);
629 if ((vsocket->supported_features & features) != features) {
631 * trying to enable features the driver doesn't
634 pthread_mutex_unlock(&vhost_user.mutex);
637 vsocket->features |= features;
639 pthread_mutex_unlock(&vhost_user.mutex);
641 return vsocket ? 0 : -1;
645 rte_vhost_driver_set_features(const char *path, uint64_t features)
647 struct vhost_user_socket *vsocket;
649 pthread_mutex_lock(&vhost_user.mutex);
650 vsocket = find_vhost_user_socket(path);
652 vsocket->supported_features = features;
653 vsocket->features = features;
655 /* Anyone setting feature bits is implementing their own vhost
658 vsocket->use_builtin_virtio_net = false;
660 pthread_mutex_unlock(&vhost_user.mutex);
662 return vsocket ? 0 : -1;
666 rte_vhost_driver_get_features(const char *path, uint64_t *features)
668 struct vhost_user_socket *vsocket;
670 pthread_mutex_lock(&vhost_user.mutex);
671 vsocket = find_vhost_user_socket(path);
673 *features = vsocket->features;
674 pthread_mutex_unlock(&vhost_user.mutex);
677 RTE_LOG(ERR, VHOST_CONFIG,
678 "socket file %s is not registered yet.\n", path);
686 * Register a new vhost-user socket; here we could act as server
687 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
691 rte_vhost_driver_register(const char *path, uint64_t flags)
694 struct vhost_user_socket *vsocket;
699 pthread_mutex_lock(&vhost_user.mutex);
701 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
702 RTE_LOG(ERR, VHOST_CONFIG,
703 "error: the number of vhost sockets reaches maximum\n");
707 vsocket = malloc(sizeof(struct vhost_user_socket));
710 memset(vsocket, 0, sizeof(struct vhost_user_socket));
711 vsocket->path = strdup(path);
712 if (vsocket->path == NULL) {
713 RTE_LOG(ERR, VHOST_CONFIG,
714 "error: failed to copy socket path string\n");
718 TAILQ_INIT(&vsocket->conn_list);
719 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
721 RTE_LOG(ERR, VHOST_CONFIG,
722 "error: failed to init connection mutex\n");
725 vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
728 * Set the supported features correctly for the builtin vhost-user
731 * Applications know nothing about features the builtin virtio net
732 * driver (virtio_net.c) supports, thus it's not possible for them
733 * to invoke rte_vhost_driver_set_features(). To workaround it, here
734 * we set it unconditionally. If the application want to implement
735 * another vhost-user driver (say SCSI), it should call the
736 * rte_vhost_driver_set_features(), which will overwrite following
739 vsocket->use_builtin_virtio_net = true;
740 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
741 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
743 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
744 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
745 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
748 if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
749 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
750 if (vsocket->reconnect && reconn_tid == 0) {
751 if (vhost_user_reconnect_init() != 0)
755 vsocket->is_server = true;
757 ret = create_unix_socket(vsocket);
762 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
764 pthread_mutex_unlock(&vhost_user.mutex);
768 if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
769 RTE_LOG(ERR, VHOST_CONFIG,
770 "error: failed to destroy connection mutex\n");
776 pthread_mutex_unlock(&vhost_user.mutex);
782 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
785 struct vhost_user_reconnect *reconn, *next;
787 pthread_mutex_lock(&reconn_list.mutex);
789 for (reconn = TAILQ_FIRST(&reconn_list.head);
790 reconn != NULL; reconn = next) {
791 next = TAILQ_NEXT(reconn, next);
793 if (reconn->vsocket == vsocket) {
794 TAILQ_REMOVE(&reconn_list.head, reconn, next);
801 pthread_mutex_unlock(&reconn_list.mutex);
806 * Unregister the specified vhost socket
809 rte_vhost_driver_unregister(const char *path)
813 struct vhost_user_connection *conn, *next;
815 pthread_mutex_lock(&vhost_user.mutex);
817 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
818 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
820 if (!strcmp(vsocket->path, path)) {
821 if (vsocket->is_server) {
822 fdset_del(&vhost_user.fdset, vsocket->socket_fd);
823 close(vsocket->socket_fd);
825 } else if (vsocket->reconnect) {
826 vhost_user_remove_reconnect(vsocket);
829 pthread_mutex_lock(&vsocket->conn_mutex);
830 for (conn = TAILQ_FIRST(&vsocket->conn_list);
833 next = TAILQ_NEXT(conn, next);
835 fdset_del(&vhost_user.fdset, conn->connfd);
836 RTE_LOG(INFO, VHOST_CONFIG,
837 "free connfd = %d for device '%s'\n",
840 vhost_destroy_device(conn->vid);
841 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
844 pthread_mutex_unlock(&vsocket->conn_mutex);
846 pthread_mutex_destroy(&vsocket->conn_mutex);
850 count = --vhost_user.vsocket_cnt;
851 vhost_user.vsockets[i] = vhost_user.vsockets[count];
852 vhost_user.vsockets[count] = NULL;
853 pthread_mutex_unlock(&vhost_user.mutex);
858 pthread_mutex_unlock(&vhost_user.mutex);
864 * Register ops so that we can add/remove device to data core.
867 rte_vhost_driver_callback_register(const char *path,
868 struct vhost_device_ops const * const ops)
870 struct vhost_user_socket *vsocket;
872 pthread_mutex_lock(&vhost_user.mutex);
873 vsocket = find_vhost_user_socket(path);
875 vsocket->notify_ops = ops;
876 pthread_mutex_unlock(&vhost_user.mutex);
878 return vsocket ? 0 : -1;
881 struct vhost_device_ops const *
882 vhost_driver_callback_get(const char *path)
884 struct vhost_user_socket *vsocket;
886 pthread_mutex_lock(&vhost_user.mutex);
887 vsocket = find_vhost_user_socket(path);
888 pthread_mutex_unlock(&vhost_user.mutex);
890 return vsocket ? vsocket->notify_ops : NULL;
894 rte_vhost_driver_start(const char *path)
896 struct vhost_user_socket *vsocket;
897 static pthread_t fdset_tid;
898 char thread_name[RTE_MAX_THREAD_NAME_LEN];
900 pthread_mutex_lock(&vhost_user.mutex);
901 vsocket = find_vhost_user_socket(path);
902 pthread_mutex_unlock(&vhost_user.mutex);
907 if (fdset_tid == 0) {
909 * create a pipe which will be waited by poll and notified to
910 * rebuild the wait list of poll.
912 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
913 RTE_LOG(ERR, VHOST_CONFIG,
914 "failed to create pipe for vhost fdset\n");
918 int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
921 RTE_LOG(ERR, VHOST_CONFIG,
922 "failed to create fdset handling thread");
924 fdset_pipe_uninit(&vhost_user.fdset);
927 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
930 if (rte_thread_setname(fdset_tid, thread_name)) {
931 RTE_LOG(DEBUG, VHOST_CONFIG,
932 "failed to set vhost-event thread name");
937 if (vsocket->is_server)
938 return vhost_user_start_server(vsocket);
940 return vhost_user_start_client(vsocket);