ipc: end multiprocess thread during cleanup
[dpdk.git] / lib / vhost / socket.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18
19 #include <rte_log.h>
20
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24
25
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27
28 /*
29  * Every time rte_vhost_driver_register() is invoked, an associated
30  * vhost_user_socket struct will be created.
31  */
32 struct vhost_user_socket {
33         struct vhost_user_connection_list conn_list;
34         pthread_mutex_t conn_mutex;
35         char *path;
36         int socket_fd;
37         struct sockaddr_un un;
38         bool is_server;
39         bool reconnect;
40         bool iommu_support;
41         bool use_builtin_virtio_net;
42         bool extbuf;
43         bool linearbuf;
44         bool async_copy;
45         bool net_compliant_ol_flags;
46
47         /*
48          * The "supported_features" indicates the feature bits the
49          * vhost driver supports. The "features" indicates the feature
50          * bits after the rte_vhost_driver_features_disable/enable().
51          * It is also the final feature bits used for vhost-user
52          * features negotiation.
53          */
54         uint64_t supported_features;
55         uint64_t features;
56
57         uint64_t protocol_features;
58
59         struct rte_vdpa_device *vdpa_dev;
60
61         struct rte_vhost_device_ops const *notify_ops;
62 };
63
64 struct vhost_user_connection {
65         struct vhost_user_socket *vsocket;
66         int connfd;
67         int vid;
68
69         TAILQ_ENTRY(vhost_user_connection) next;
70 };
71
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74         struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75         struct fdset fdset;
76         int vsocket_cnt;
77         pthread_mutex_t mutex;
78 };
79
80 #define MAX_VIRTIO_BACKLOG 128
81
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86
87 static struct vhost_user vhost_user = {
88         .fdset = {
89                 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90                 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91                 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
92                 .num = 0
93         },
94         .vsocket_cnt = 0,
95         .mutex = PTHREAD_MUTEX_INITIALIZER,
96 };
97
98 /*
99  * return bytes# of read on success or negative val on failure. Update fdnum
100  * with number of fds read.
101  */
102 int
103 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
104                 int *fd_num)
105 {
106         struct iovec iov;
107         struct msghdr msgh;
108         char control[CMSG_SPACE(max_fds * sizeof(int))];
109         struct cmsghdr *cmsg;
110         int got_fds = 0;
111         int ret;
112
113         *fd_num = 0;
114
115         memset(&msgh, 0, sizeof(msgh));
116         iov.iov_base = buf;
117         iov.iov_len  = buflen;
118
119         msgh.msg_iov = &iov;
120         msgh.msg_iovlen = 1;
121         msgh.msg_control = control;
122         msgh.msg_controllen = sizeof(control);
123
124         ret = recvmsg(sockfd, &msgh, 0);
125         if (ret <= 0) {
126                 if (ret)
127                         VHOST_LOG_CONFIG(ERR, "(%s) recvmsg failed on fd %d (%s)\n",
128                                         ifname, sockfd, strerror(errno));
129                 return ret;
130         }
131
132         if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
133                 VHOST_LOG_CONFIG(ERR, "(%s) truncated msg (fd %d)\n", ifname, sockfd);
134                 return -1;
135         }
136
137         for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
138                 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
139                 if ((cmsg->cmsg_level == SOL_SOCKET) &&
140                         (cmsg->cmsg_type == SCM_RIGHTS)) {
141                         got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
142                         *fd_num = got_fds;
143                         memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
144                         break;
145                 }
146         }
147
148         /* Clear out unused file descriptors */
149         while (got_fds < max_fds)
150                 fds[got_fds++] = -1;
151
152         return ret;
153 }
154
155 int
156 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
157 {
158
159         struct iovec iov;
160         struct msghdr msgh;
161         size_t fdsize = fd_num * sizeof(int);
162         char control[CMSG_SPACE(fdsize)];
163         struct cmsghdr *cmsg;
164         int ret;
165
166         memset(&msgh, 0, sizeof(msgh));
167         iov.iov_base = buf;
168         iov.iov_len = buflen;
169
170         msgh.msg_iov = &iov;
171         msgh.msg_iovlen = 1;
172
173         if (fds && fd_num > 0) {
174                 msgh.msg_control = control;
175                 msgh.msg_controllen = sizeof(control);
176                 cmsg = CMSG_FIRSTHDR(&msgh);
177                 if (cmsg == NULL) {
178                         VHOST_LOG_CONFIG(ERR, "(%s) cmsg == NULL\n", ifname);
179                         errno = EINVAL;
180                         return -1;
181                 }
182                 cmsg->cmsg_len = CMSG_LEN(fdsize);
183                 cmsg->cmsg_level = SOL_SOCKET;
184                 cmsg->cmsg_type = SCM_RIGHTS;
185                 memcpy(CMSG_DATA(cmsg), fds, fdsize);
186         } else {
187                 msgh.msg_control = NULL;
188                 msgh.msg_controllen = 0;
189         }
190
191         do {
192                 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
193         } while (ret < 0 && errno == EINTR);
194
195         if (ret < 0) {
196                 VHOST_LOG_CONFIG(ERR, "(%s) sendmsg error on fd %d (%s)\n",
197                                 ifname, sockfd, strerror(errno));
198                 return ret;
199         }
200
201         return ret;
202 }
203
204 static void
205 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
206 {
207         int vid;
208         size_t size;
209         struct vhost_user_connection *conn;
210         int ret;
211         struct virtio_net *dev;
212
213         if (vsocket == NULL)
214                 return;
215
216         conn = malloc(sizeof(*conn));
217         if (conn == NULL) {
218                 close(fd);
219                 return;
220         }
221
222         vid = vhost_new_device();
223         if (vid == -1) {
224                 goto err;
225         }
226
227         size = strnlen(vsocket->path, PATH_MAX);
228         vhost_set_ifname(vid, vsocket->path, size);
229
230         vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
231                 vsocket->net_compliant_ol_flags);
232
233         vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
234
235         if (vsocket->extbuf)
236                 vhost_enable_extbuf(vid);
237
238         if (vsocket->linearbuf)
239                 vhost_enable_linearbuf(vid);
240
241         if (vsocket->async_copy) {
242                 dev = get_device(vid);
243
244                 if (dev)
245                         dev->async_copy = 1;
246         }
247
248         VHOST_LOG_CONFIG(INFO, "(%s) new device, handle is %d\n", vsocket->path, vid);
249
250         if (vsocket->notify_ops->new_connection) {
251                 ret = vsocket->notify_ops->new_connection(vid);
252                 if (ret < 0) {
253                         VHOST_LOG_CONFIG(ERR,
254                                 "(%s) failed to add vhost user connection with fd %d\n",
255                                 vsocket->path, fd);
256                         goto err_cleanup;
257                 }
258         }
259
260         conn->connfd = fd;
261         conn->vsocket = vsocket;
262         conn->vid = vid;
263         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
264                         NULL, conn);
265         if (ret < 0) {
266                 VHOST_LOG_CONFIG(ERR, "(%s) failed to add fd %d into vhost server fdset\n",
267                         vsocket->path, fd);
268
269                 if (vsocket->notify_ops->destroy_connection)
270                         vsocket->notify_ops->destroy_connection(conn->vid);
271
272                 goto err_cleanup;
273         }
274
275         pthread_mutex_lock(&vsocket->conn_mutex);
276         TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
277         pthread_mutex_unlock(&vsocket->conn_mutex);
278
279         fdset_pipe_notify(&vhost_user.fdset);
280         return;
281
282 err_cleanup:
283         vhost_destroy_device(vid);
284 err:
285         free(conn);
286         close(fd);
287 }
288
289 /* call back when there is new vhost-user connection from client  */
290 static void
291 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
292 {
293         struct vhost_user_socket *vsocket = dat;
294
295         fd = accept(fd, NULL, NULL);
296         if (fd < 0)
297                 return;
298
299         VHOST_LOG_CONFIG(INFO, "(%s) new vhost user connection is %d\n",
300                         vsocket->path, fd);
301         vhost_user_add_connection(fd, vsocket);
302 }
303
304 static void
305 vhost_user_read_cb(int connfd, void *dat, int *remove)
306 {
307         struct vhost_user_connection *conn = dat;
308         struct vhost_user_socket *vsocket = conn->vsocket;
309         int ret;
310
311         ret = vhost_user_msg_handler(conn->vid, connfd);
312         if (ret < 0) {
313                 struct virtio_net *dev = get_device(conn->vid);
314
315                 close(connfd);
316                 *remove = 1;
317
318                 if (dev)
319                         vhost_destroy_device_notify(dev);
320
321                 if (vsocket->notify_ops->destroy_connection)
322                         vsocket->notify_ops->destroy_connection(conn->vid);
323
324                 vhost_destroy_device(conn->vid);
325
326                 if (vsocket->reconnect) {
327                         create_unix_socket(vsocket);
328                         vhost_user_start_client(vsocket);
329                 }
330
331                 pthread_mutex_lock(&vsocket->conn_mutex);
332                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
333                 pthread_mutex_unlock(&vsocket->conn_mutex);
334
335                 free(conn);
336         }
337 }
338
339 static int
340 create_unix_socket(struct vhost_user_socket *vsocket)
341 {
342         int fd;
343         struct sockaddr_un *un = &vsocket->un;
344
345         fd = socket(AF_UNIX, SOCK_STREAM, 0);
346         if (fd < 0)
347                 return -1;
348         VHOST_LOG_CONFIG(INFO, "(%s) vhost-user %s: socket created, fd: %d\n",
349                 vsocket->path, vsocket->is_server ? "server" : "client", fd);
350
351         if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
352                 VHOST_LOG_CONFIG(ERR,
353                         "(%s) vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
354                         vsocket->path, fd, strerror(errno));
355                 close(fd);
356                 return -1;
357         }
358
359         memset(un, 0, sizeof(*un));
360         un->sun_family = AF_UNIX;
361         strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
362         un->sun_path[sizeof(un->sun_path) - 1] = '\0';
363
364         vsocket->socket_fd = fd;
365         return 0;
366 }
367
368 static int
369 vhost_user_start_server(struct vhost_user_socket *vsocket)
370 {
371         int ret;
372         int fd = vsocket->socket_fd;
373         const char *path = vsocket->path;
374
375         /*
376          * bind () may fail if the socket file with the same name already
377          * exists. But the library obviously should not delete the file
378          * provided by the user, since we can not be sure that it is not
379          * being used by other applications. Moreover, many applications form
380          * socket names based on user input, which is prone to errors.
381          *
382          * The user must ensure that the socket does not exist before
383          * registering the vhost driver in server mode.
384          */
385         ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
386         if (ret < 0) {
387                 VHOST_LOG_CONFIG(ERR, "(%s) failed to bind: %s; remove it and try again\n",
388                         path, strerror(errno));
389                 goto err;
390         }
391         VHOST_LOG_CONFIG(INFO, "(%s) binding succeeded\n", path);
392
393         ret = listen(fd, MAX_VIRTIO_BACKLOG);
394         if (ret < 0)
395                 goto err;
396
397         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
398                   NULL, vsocket);
399         if (ret < 0) {
400                 VHOST_LOG_CONFIG(ERR,
401                         "(%s) failed to add listen fd %d to vhost server fdset\n",
402                         path, fd);
403                 goto err;
404         }
405
406         return 0;
407
408 err:
409         close(fd);
410         return -1;
411 }
412
413 struct vhost_user_reconnect {
414         struct sockaddr_un un;
415         int fd;
416         struct vhost_user_socket *vsocket;
417
418         TAILQ_ENTRY(vhost_user_reconnect) next;
419 };
420
421 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
422 struct vhost_user_reconnect_list {
423         struct vhost_user_reconnect_tailq_list head;
424         pthread_mutex_t mutex;
425 };
426
427 static struct vhost_user_reconnect_list reconn_list;
428 static pthread_t reconn_tid;
429
430 static int
431 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
432 {
433         int ret, flags;
434
435         ret = connect(fd, un, sz);
436         if (ret < 0 && errno != EISCONN)
437                 return -1;
438
439         flags = fcntl(fd, F_GETFL, 0);
440         if (flags < 0) {
441                 VHOST_LOG_CONFIG(ERR, "(%s) can't get flags for connfd %d (%s)\n",
442                                 path, fd, strerror(errno));
443                 return -2;
444         }
445         if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
446                 VHOST_LOG_CONFIG(ERR, "(%s) can't disable nonblocking on fd %d\n", path, fd);
447                 return -2;
448         }
449         return 0;
450 }
451
452 static void *
453 vhost_user_client_reconnect(void *arg __rte_unused)
454 {
455         int ret;
456         struct vhost_user_reconnect *reconn, *next;
457
458         while (1) {
459                 pthread_mutex_lock(&reconn_list.mutex);
460
461                 /*
462                  * An equal implementation of TAILQ_FOREACH_SAFE,
463                  * which does not exist on all platforms.
464                  */
465                 for (reconn = TAILQ_FIRST(&reconn_list.head);
466                      reconn != NULL; reconn = next) {
467                         next = TAILQ_NEXT(reconn, next);
468
469                         ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
470                                                 (struct sockaddr *)&reconn->un,
471                                                 sizeof(reconn->un));
472                         if (ret == -2) {
473                                 close(reconn->fd);
474                                 VHOST_LOG_CONFIG(ERR, "(%s) reconnection for fd %d failed\n",
475                                         reconn->vsocket->path, reconn->fd);
476                                 goto remove_fd;
477                         }
478                         if (ret == -1)
479                                 continue;
480
481                         VHOST_LOG_CONFIG(INFO, "(%s) connected\n", reconn->vsocket->path);
482                         vhost_user_add_connection(reconn->fd, reconn->vsocket);
483 remove_fd:
484                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
485                         free(reconn);
486                 }
487
488                 pthread_mutex_unlock(&reconn_list.mutex);
489                 sleep(1);
490         }
491
492         return NULL;
493 }
494
495 static int
496 vhost_user_reconnect_init(void)
497 {
498         int ret;
499
500         ret = pthread_mutex_init(&reconn_list.mutex, NULL);
501         if (ret < 0) {
502                 VHOST_LOG_CONFIG(ERR, "%s: failed to initialize mutex", __func__);
503                 return ret;
504         }
505         TAILQ_INIT(&reconn_list.head);
506
507         ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
508                              vhost_user_client_reconnect, NULL);
509         if (ret != 0) {
510                 VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
511                 if (pthread_mutex_destroy(&reconn_list.mutex))
512                         VHOST_LOG_CONFIG(ERR, "%s: failed to destroy reconnect mutex", __func__);
513         }
514
515         return ret;
516 }
517
518 static int
519 vhost_user_start_client(struct vhost_user_socket *vsocket)
520 {
521         int ret;
522         int fd = vsocket->socket_fd;
523         const char *path = vsocket->path;
524         struct vhost_user_reconnect *reconn;
525
526         ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
527                                           sizeof(vsocket->un));
528         if (ret == 0) {
529                 vhost_user_add_connection(fd, vsocket);
530                 return 0;
531         }
532
533         VHOST_LOG_CONFIG(WARNING, "(%s) failed to connect: %s\n", path, strerror(errno));
534
535         if (ret == -2 || !vsocket->reconnect) {
536                 close(fd);
537                 return -1;
538         }
539
540         VHOST_LOG_CONFIG(INFO, "(%s) reconnecting...\n", path);
541         reconn = malloc(sizeof(*reconn));
542         if (reconn == NULL) {
543                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for reconnect\n", path);
544                 close(fd);
545                 return -1;
546         }
547         reconn->un = vsocket->un;
548         reconn->fd = fd;
549         reconn->vsocket = vsocket;
550         pthread_mutex_lock(&reconn_list.mutex);
551         TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
552         pthread_mutex_unlock(&reconn_list.mutex);
553
554         return 0;
555 }
556
557 static struct vhost_user_socket *
558 find_vhost_user_socket(const char *path)
559 {
560         int i;
561
562         if (path == NULL)
563                 return NULL;
564
565         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
566                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
567
568                 if (!strcmp(vsocket->path, path))
569                         return vsocket;
570         }
571
572         return NULL;
573 }
574
575 int
576 rte_vhost_driver_attach_vdpa_device(const char *path,
577                 struct rte_vdpa_device *dev)
578 {
579         struct vhost_user_socket *vsocket;
580
581         if (dev == NULL || path == NULL)
582                 return -1;
583
584         pthread_mutex_lock(&vhost_user.mutex);
585         vsocket = find_vhost_user_socket(path);
586         if (vsocket)
587                 vsocket->vdpa_dev = dev;
588         pthread_mutex_unlock(&vhost_user.mutex);
589
590         return vsocket ? 0 : -1;
591 }
592
593 int
594 rte_vhost_driver_detach_vdpa_device(const char *path)
595 {
596         struct vhost_user_socket *vsocket;
597
598         pthread_mutex_lock(&vhost_user.mutex);
599         vsocket = find_vhost_user_socket(path);
600         if (vsocket)
601                 vsocket->vdpa_dev = NULL;
602         pthread_mutex_unlock(&vhost_user.mutex);
603
604         return vsocket ? 0 : -1;
605 }
606
607 struct rte_vdpa_device *
608 rte_vhost_driver_get_vdpa_device(const char *path)
609 {
610         struct vhost_user_socket *vsocket;
611         struct rte_vdpa_device *dev = NULL;
612
613         pthread_mutex_lock(&vhost_user.mutex);
614         vsocket = find_vhost_user_socket(path);
615         if (vsocket)
616                 dev = vsocket->vdpa_dev;
617         pthread_mutex_unlock(&vhost_user.mutex);
618
619         return dev;
620 }
621
622 int
623 rte_vhost_driver_disable_features(const char *path, uint64_t features)
624 {
625         struct vhost_user_socket *vsocket;
626
627         pthread_mutex_lock(&vhost_user.mutex);
628         vsocket = find_vhost_user_socket(path);
629
630         /* Note that use_builtin_virtio_net is not affected by this function
631          * since callers may want to selectively disable features of the
632          * built-in vhost net device backend.
633          */
634
635         if (vsocket)
636                 vsocket->features &= ~features;
637         pthread_mutex_unlock(&vhost_user.mutex);
638
639         return vsocket ? 0 : -1;
640 }
641
642 int
643 rte_vhost_driver_enable_features(const char *path, uint64_t features)
644 {
645         struct vhost_user_socket *vsocket;
646
647         pthread_mutex_lock(&vhost_user.mutex);
648         vsocket = find_vhost_user_socket(path);
649         if (vsocket) {
650                 if ((vsocket->supported_features & features) != features) {
651                         /*
652                          * trying to enable features the driver doesn't
653                          * support.
654                          */
655                         pthread_mutex_unlock(&vhost_user.mutex);
656                         return -1;
657                 }
658                 vsocket->features |= features;
659         }
660         pthread_mutex_unlock(&vhost_user.mutex);
661
662         return vsocket ? 0 : -1;
663 }
664
665 int
666 rte_vhost_driver_set_features(const char *path, uint64_t features)
667 {
668         struct vhost_user_socket *vsocket;
669
670         pthread_mutex_lock(&vhost_user.mutex);
671         vsocket = find_vhost_user_socket(path);
672         if (vsocket) {
673                 vsocket->supported_features = features;
674                 vsocket->features = features;
675
676                 /* Anyone setting feature bits is implementing their own vhost
677                  * device backend.
678                  */
679                 vsocket->use_builtin_virtio_net = false;
680         }
681         pthread_mutex_unlock(&vhost_user.mutex);
682
683         return vsocket ? 0 : -1;
684 }
685
686 int
687 rte_vhost_driver_get_features(const char *path, uint64_t *features)
688 {
689         struct vhost_user_socket *vsocket;
690         uint64_t vdpa_features;
691         struct rte_vdpa_device *vdpa_dev;
692         int ret = 0;
693
694         pthread_mutex_lock(&vhost_user.mutex);
695         vsocket = find_vhost_user_socket(path);
696         if (!vsocket) {
697                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
698                 ret = -1;
699                 goto unlock_exit;
700         }
701
702         vdpa_dev = vsocket->vdpa_dev;
703         if (!vdpa_dev) {
704                 *features = vsocket->features;
705                 goto unlock_exit;
706         }
707
708         if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
709                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa features for socket file.\n", path);
710                 ret = -1;
711                 goto unlock_exit;
712         }
713
714         *features = vsocket->features & vdpa_features;
715
716 unlock_exit:
717         pthread_mutex_unlock(&vhost_user.mutex);
718         return ret;
719 }
720
721 int
722 rte_vhost_driver_set_protocol_features(const char *path,
723                 uint64_t protocol_features)
724 {
725         struct vhost_user_socket *vsocket;
726
727         pthread_mutex_lock(&vhost_user.mutex);
728         vsocket = find_vhost_user_socket(path);
729         if (vsocket)
730                 vsocket->protocol_features = protocol_features;
731         pthread_mutex_unlock(&vhost_user.mutex);
732         return vsocket ? 0 : -1;
733 }
734
735 int
736 rte_vhost_driver_get_protocol_features(const char *path,
737                 uint64_t *protocol_features)
738 {
739         struct vhost_user_socket *vsocket;
740         uint64_t vdpa_protocol_features;
741         struct rte_vdpa_device *vdpa_dev;
742         int ret = 0;
743
744         pthread_mutex_lock(&vhost_user.mutex);
745         vsocket = find_vhost_user_socket(path);
746         if (!vsocket) {
747                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
748                 ret = -1;
749                 goto unlock_exit;
750         }
751
752         vdpa_dev = vsocket->vdpa_dev;
753         if (!vdpa_dev) {
754                 *protocol_features = vsocket->protocol_features;
755                 goto unlock_exit;
756         }
757
758         if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
759                                 &vdpa_protocol_features) < 0) {
760                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa protocol features.\n",
761                                 path);
762                 ret = -1;
763                 goto unlock_exit;
764         }
765
766         *protocol_features = vsocket->protocol_features
767                 & vdpa_protocol_features;
768
769 unlock_exit:
770         pthread_mutex_unlock(&vhost_user.mutex);
771         return ret;
772 }
773
774 int
775 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
776 {
777         struct vhost_user_socket *vsocket;
778         uint32_t vdpa_queue_num;
779         struct rte_vdpa_device *vdpa_dev;
780         int ret = 0;
781
782         pthread_mutex_lock(&vhost_user.mutex);
783         vsocket = find_vhost_user_socket(path);
784         if (!vsocket) {
785                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
786                 ret = -1;
787                 goto unlock_exit;
788         }
789
790         vdpa_dev = vsocket->vdpa_dev;
791         if (!vdpa_dev) {
792                 *queue_num = VHOST_MAX_QUEUE_PAIRS;
793                 goto unlock_exit;
794         }
795
796         if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
797                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa queue number.\n",
798                                 path);
799                 ret = -1;
800                 goto unlock_exit;
801         }
802
803         *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
804
805 unlock_exit:
806         pthread_mutex_unlock(&vhost_user.mutex);
807         return ret;
808 }
809
810 static void
811 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
812 {
813         if (vsocket && vsocket->path) {
814                 free(vsocket->path);
815                 vsocket->path = NULL;
816         }
817
818         if (vsocket) {
819                 free(vsocket);
820                 vsocket = NULL;
821         }
822 }
823
824 /*
825  * Register a new vhost-user socket; here we could act as server
826  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
827  * is set.
828  */
829 int
830 rte_vhost_driver_register(const char *path, uint64_t flags)
831 {
832         int ret = -1;
833         struct vhost_user_socket *vsocket;
834
835         if (!path)
836                 return -1;
837
838         pthread_mutex_lock(&vhost_user.mutex);
839
840         if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
841                 VHOST_LOG_CONFIG(ERR, "(%s) the number of vhost sockets reaches maximum\n",
842                                 path);
843                 goto out;
844         }
845
846         vsocket = malloc(sizeof(struct vhost_user_socket));
847         if (!vsocket)
848                 goto out;
849         memset(vsocket, 0, sizeof(struct vhost_user_socket));
850         vsocket->path = strdup(path);
851         if (vsocket->path == NULL) {
852                 VHOST_LOG_CONFIG(ERR, "(%s) failed to copy socket path string\n", path);
853                 vhost_user_socket_mem_free(vsocket);
854                 goto out;
855         }
856         TAILQ_INIT(&vsocket->conn_list);
857         ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
858         if (ret) {
859                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init connection mutex\n", path);
860                 goto out_free;
861         }
862         vsocket->vdpa_dev = NULL;
863         vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
864         vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
865         vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
866         vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
867
868         if (vsocket->async_copy &&
869                 (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
870                 RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
871                 VHOST_LOG_CONFIG(ERR, "(%s) async copy with IOMMU or post-copy not supported\n",
872                                 path);
873                 goto out_mutex;
874         }
875
876         /*
877          * Set the supported features correctly for the builtin vhost-user
878          * net driver.
879          *
880          * Applications know nothing about features the builtin virtio net
881          * driver (virtio_net.c) supports, thus it's not possible for them
882          * to invoke rte_vhost_driver_set_features(). To workaround it, here
883          * we set it unconditionally. If the application want to implement
884          * another vhost-user driver (say SCSI), it should call the
885          * rte_vhost_driver_set_features(), which will overwrite following
886          * two values.
887          */
888         vsocket->use_builtin_virtio_net = true;
889         vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
890         vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
891         vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
892
893         if (vsocket->async_copy) {
894                 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
895                 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
896                 VHOST_LOG_CONFIG(INFO, "(%s) logging feature is disabled in async copy mode\n",
897                                 path);
898         }
899
900         /*
901          * We'll not be able to receive a buffer from guest in linear mode
902          * without external buffer if it will not fit in a single mbuf, which is
903          * likely if segmentation offloading enabled.
904          */
905         if (vsocket->linearbuf && !vsocket->extbuf) {
906                 uint64_t seg_offload_features =
907                                 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
908                                 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
909                                 (1ULL << VIRTIO_NET_F_HOST_UFO);
910
911                 VHOST_LOG_CONFIG(INFO, "(%s) Linear buffers requested without external buffers,\n",
912                                 path);
913                 VHOST_LOG_CONFIG(INFO, "(%s) disabling host segmentation offloading support\n",
914                                 path);
915                 vsocket->supported_features &= ~seg_offload_features;
916                 vsocket->features &= ~seg_offload_features;
917         }
918
919         if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
920                 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
921                 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
922         }
923
924         if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
925                 vsocket->protocol_features &=
926                         ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
927         } else {
928 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
929                 VHOST_LOG_CONFIG(ERR, "(%s) Postcopy requested but not compiled\n", path);
930                 ret = -1;
931                 goto out_mutex;
932 #endif
933         }
934
935         if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
936                 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
937                 if (vsocket->reconnect && reconn_tid == 0) {
938                         if (vhost_user_reconnect_init() != 0)
939                                 goto out_mutex;
940                 }
941         } else {
942                 vsocket->is_server = true;
943         }
944         ret = create_unix_socket(vsocket);
945         if (ret < 0) {
946                 goto out_mutex;
947         }
948
949         vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
950
951         pthread_mutex_unlock(&vhost_user.mutex);
952         return ret;
953
954 out_mutex:
955         if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
956                 VHOST_LOG_CONFIG(ERR, "(%s) failed to destroy connection mutex\n", path);
957         }
958 out_free:
959         vhost_user_socket_mem_free(vsocket);
960 out:
961         pthread_mutex_unlock(&vhost_user.mutex);
962
963         return ret;
964 }
965
966 static bool
967 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
968 {
969         int found = false;
970         struct vhost_user_reconnect *reconn, *next;
971
972         pthread_mutex_lock(&reconn_list.mutex);
973
974         for (reconn = TAILQ_FIRST(&reconn_list.head);
975              reconn != NULL; reconn = next) {
976                 next = TAILQ_NEXT(reconn, next);
977
978                 if (reconn->vsocket == vsocket) {
979                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
980                         close(reconn->fd);
981                         free(reconn);
982                         found = true;
983                         break;
984                 }
985         }
986         pthread_mutex_unlock(&reconn_list.mutex);
987         return found;
988 }
989
990 /**
991  * Unregister the specified vhost socket
992  */
993 int
994 rte_vhost_driver_unregister(const char *path)
995 {
996         int i;
997         int count;
998         struct vhost_user_connection *conn, *next;
999
1000         if (path == NULL)
1001                 return -1;
1002
1003 again:
1004         pthread_mutex_lock(&vhost_user.mutex);
1005
1006         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1007                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1008                 if (strcmp(vsocket->path, path))
1009                         continue;
1010
1011                 if (vsocket->is_server) {
1012                         /*
1013                          * If r/wcb is executing, release vhost_user's
1014                          * mutex lock, and try again since the r/wcb
1015                          * may use the mutex lock.
1016                          */
1017                         if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1018                                 pthread_mutex_unlock(&vhost_user.mutex);
1019                                 goto again;
1020                         }
1021                 } else if (vsocket->reconnect) {
1022                         vhost_user_remove_reconnect(vsocket);
1023                 }
1024
1025                 pthread_mutex_lock(&vsocket->conn_mutex);
1026                 for (conn = TAILQ_FIRST(&vsocket->conn_list);
1027                          conn != NULL;
1028                          conn = next) {
1029                         next = TAILQ_NEXT(conn, next);
1030
1031                         /*
1032                          * If r/wcb is executing, release vsocket's
1033                          * conn_mutex and vhost_user's mutex locks, and
1034                          * try again since the r/wcb may use the
1035                          * conn_mutex and mutex locks.
1036                          */
1037                         if (fdset_try_del(&vhost_user.fdset,
1038                                           conn->connfd) == -1) {
1039                                 pthread_mutex_unlock(&vsocket->conn_mutex);
1040                                 pthread_mutex_unlock(&vhost_user.mutex);
1041                                 goto again;
1042                         }
1043
1044                         VHOST_LOG_CONFIG(INFO, "(%s) free connfd %d\n", path, conn->connfd);
1045                         close(conn->connfd);
1046                         vhost_destroy_device(conn->vid);
1047                         TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1048                         free(conn);
1049                 }
1050                 pthread_mutex_unlock(&vsocket->conn_mutex);
1051
1052                 if (vsocket->is_server) {
1053                         close(vsocket->socket_fd);
1054                         unlink(path);
1055                 }
1056
1057                 pthread_mutex_destroy(&vsocket->conn_mutex);
1058                 vhost_user_socket_mem_free(vsocket);
1059
1060                 count = --vhost_user.vsocket_cnt;
1061                 vhost_user.vsockets[i] = vhost_user.vsockets[count];
1062                 vhost_user.vsockets[count] = NULL;
1063                 pthread_mutex_unlock(&vhost_user.mutex);
1064                 return 0;
1065         }
1066         pthread_mutex_unlock(&vhost_user.mutex);
1067
1068         return -1;
1069 }
1070
1071 /*
1072  * Register ops so that we can add/remove device to data core.
1073  */
1074 int
1075 rte_vhost_driver_callback_register(const char *path,
1076         struct rte_vhost_device_ops const * const ops)
1077 {
1078         struct vhost_user_socket *vsocket;
1079
1080         pthread_mutex_lock(&vhost_user.mutex);
1081         vsocket = find_vhost_user_socket(path);
1082         if (vsocket)
1083                 vsocket->notify_ops = ops;
1084         pthread_mutex_unlock(&vhost_user.mutex);
1085
1086         return vsocket ? 0 : -1;
1087 }
1088
1089 struct rte_vhost_device_ops const *
1090 vhost_driver_callback_get(const char *path)
1091 {
1092         struct vhost_user_socket *vsocket;
1093
1094         pthread_mutex_lock(&vhost_user.mutex);
1095         vsocket = find_vhost_user_socket(path);
1096         pthread_mutex_unlock(&vhost_user.mutex);
1097
1098         return vsocket ? vsocket->notify_ops : NULL;
1099 }
1100
1101 int
1102 rte_vhost_driver_start(const char *path)
1103 {
1104         struct vhost_user_socket *vsocket;
1105         static pthread_t fdset_tid;
1106
1107         pthread_mutex_lock(&vhost_user.mutex);
1108         vsocket = find_vhost_user_socket(path);
1109         pthread_mutex_unlock(&vhost_user.mutex);
1110
1111         if (!vsocket)
1112                 return -1;
1113
1114         if (fdset_tid == 0) {
1115                 /**
1116                  * create a pipe which will be waited by poll and notified to
1117                  * rebuild the wait list of poll.
1118                  */
1119                 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1120                         VHOST_LOG_CONFIG(ERR, "(%s) failed to create pipe for vhost fdset\n", path);
1121                         return -1;
1122                 }
1123
1124                 int ret = rte_ctrl_thread_create(&fdset_tid,
1125                         "vhost-events", NULL, fdset_event_dispatch,
1126                         &vhost_user.fdset);
1127                 if (ret != 0) {
1128                         VHOST_LOG_CONFIG(ERR, "(%s) failed to create fdset handling thread", path);
1129
1130                         fdset_pipe_uninit(&vhost_user.fdset);
1131                         return -1;
1132                 }
1133         }
1134
1135         if (vsocket->is_server)
1136                 return vhost_user_start_server(vsocket);
1137         else
1138                 return vhost_user_start_client(vsocket);
1139 }