net/cnxk: remove restriction on VF for PFC config
[dpdk.git] / lib / vhost / socket.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/queue.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <pthread.h>
17
18 #include <rte_log.h>
19
20 #include "fd_man.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24
25 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
26
27 /*
28  * Every time rte_vhost_driver_register() is invoked, an associated
29  * vhost_user_socket struct will be created.
30  */
31 struct vhost_user_socket {
32         struct vhost_user_connection_list conn_list;
33         pthread_mutex_t conn_mutex;
34         char *path;
35         int socket_fd;
36         struct sockaddr_un un;
37         bool is_server;
38         bool reconnect;
39         bool iommu_support;
40         bool use_builtin_virtio_net;
41         bool extbuf;
42         bool linearbuf;
43         bool async_copy;
44         bool net_compliant_ol_flags;
45         bool stats_enabled;
46
47         /*
48          * The "supported_features" indicates the feature bits the
49          * vhost driver supports. The "features" indicates the feature
50          * bits after the rte_vhost_driver_features_disable/enable().
51          * It is also the final feature bits used for vhost-user
52          * features negotiation.
53          */
54         uint64_t supported_features;
55         uint64_t features;
56
57         uint64_t protocol_features;
58
59         struct rte_vdpa_device *vdpa_dev;
60
61         struct rte_vhost_device_ops const *notify_ops;
62 };
63
64 struct vhost_user_connection {
65         struct vhost_user_socket *vsocket;
66         int connfd;
67         int vid;
68
69         TAILQ_ENTRY(vhost_user_connection) next;
70 };
71
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74         struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75         struct fdset fdset;
76         int vsocket_cnt;
77         pthread_mutex_t mutex;
78 };
79
80 #define MAX_VIRTIO_BACKLOG 128
81
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86
87 static struct vhost_user vhost_user = {
88         .fdset = {
89                 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90                 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91                 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
92                 .num = 0
93         },
94         .vsocket_cnt = 0,
95         .mutex = PTHREAD_MUTEX_INITIALIZER,
96 };
97
98 /*
99  * return bytes# of read on success or negative val on failure. Update fdnum
100  * with number of fds read.
101  */
102 int
103 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
104                 int *fd_num)
105 {
106         struct iovec iov;
107         struct msghdr msgh;
108         char control[CMSG_SPACE(max_fds * sizeof(int))];
109         struct cmsghdr *cmsg;
110         int got_fds = 0;
111         int ret;
112
113         *fd_num = 0;
114
115         memset(&msgh, 0, sizeof(msgh));
116         iov.iov_base = buf;
117         iov.iov_len  = buflen;
118
119         msgh.msg_iov = &iov;
120         msgh.msg_iovlen = 1;
121         msgh.msg_control = control;
122         msgh.msg_controllen = sizeof(control);
123
124         ret = recvmsg(sockfd, &msgh, 0);
125         if (ret <= 0) {
126                 if (ret)
127                         VHOST_LOG_CONFIG(ERR, "(%s) recvmsg failed on fd %d (%s)\n",
128                                         ifname, sockfd, strerror(errno));
129                 return ret;
130         }
131
132         if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
133                 VHOST_LOG_CONFIG(ERR, "(%s) truncated msg (fd %d)\n", ifname, sockfd);
134                 return -1;
135         }
136
137         for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
138                 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
139                 if ((cmsg->cmsg_level == SOL_SOCKET) &&
140                         (cmsg->cmsg_type == SCM_RIGHTS)) {
141                         got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
142                         *fd_num = got_fds;
143                         memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
144                         break;
145                 }
146         }
147
148         /* Clear out unused file descriptors */
149         while (got_fds < max_fds)
150                 fds[got_fds++] = -1;
151
152         return ret;
153 }
154
155 int
156 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
157 {
158
159         struct iovec iov;
160         struct msghdr msgh;
161         size_t fdsize = fd_num * sizeof(int);
162         char control[CMSG_SPACE(fdsize)];
163         struct cmsghdr *cmsg;
164         int ret;
165
166         memset(&msgh, 0, sizeof(msgh));
167         iov.iov_base = buf;
168         iov.iov_len = buflen;
169
170         msgh.msg_iov = &iov;
171         msgh.msg_iovlen = 1;
172
173         if (fds && fd_num > 0) {
174                 msgh.msg_control = control;
175                 msgh.msg_controllen = sizeof(control);
176                 cmsg = CMSG_FIRSTHDR(&msgh);
177                 if (cmsg == NULL) {
178                         VHOST_LOG_CONFIG(ERR, "(%s) cmsg == NULL\n", ifname);
179                         errno = EINVAL;
180                         return -1;
181                 }
182                 cmsg->cmsg_len = CMSG_LEN(fdsize);
183                 cmsg->cmsg_level = SOL_SOCKET;
184                 cmsg->cmsg_type = SCM_RIGHTS;
185                 memcpy(CMSG_DATA(cmsg), fds, fdsize);
186         } else {
187                 msgh.msg_control = NULL;
188                 msgh.msg_controllen = 0;
189         }
190
191         do {
192                 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
193         } while (ret < 0 && errno == EINTR);
194
195         if (ret < 0) {
196                 VHOST_LOG_CONFIG(ERR, "(%s) sendmsg error on fd %d (%s)\n",
197                                 ifname, sockfd, strerror(errno));
198                 return ret;
199         }
200
201         return ret;
202 }
203
204 static void
205 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
206 {
207         int vid;
208         size_t size;
209         struct vhost_user_connection *conn;
210         int ret;
211         struct virtio_net *dev;
212
213         if (vsocket == NULL)
214                 return;
215
216         conn = malloc(sizeof(*conn));
217         if (conn == NULL) {
218                 close(fd);
219                 return;
220         }
221
222         vid = vhost_new_device();
223         if (vid == -1) {
224                 goto err;
225         }
226
227         size = strnlen(vsocket->path, PATH_MAX);
228         vhost_set_ifname(vid, vsocket->path, size);
229
230         vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
231                 vsocket->net_compliant_ol_flags, vsocket->stats_enabled);
232
233         vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
234
235         if (vsocket->extbuf)
236                 vhost_enable_extbuf(vid);
237
238         if (vsocket->linearbuf)
239                 vhost_enable_linearbuf(vid);
240
241         if (vsocket->async_copy) {
242                 dev = get_device(vid);
243
244                 if (dev)
245                         dev->async_copy = 1;
246         }
247
248         VHOST_LOG_CONFIG(INFO, "(%s) new device, handle is %d\n", vsocket->path, vid);
249
250         if (vsocket->notify_ops->new_connection) {
251                 ret = vsocket->notify_ops->new_connection(vid);
252                 if (ret < 0) {
253                         VHOST_LOG_CONFIG(ERR,
254                                 "(%s) failed to add vhost user connection with fd %d\n",
255                                 vsocket->path, fd);
256                         goto err_cleanup;
257                 }
258         }
259
260         conn->connfd = fd;
261         conn->vsocket = vsocket;
262         conn->vid = vid;
263         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
264                         NULL, conn);
265         if (ret < 0) {
266                 VHOST_LOG_CONFIG(ERR, "(%s) failed to add fd %d into vhost server fdset\n",
267                         vsocket->path, fd);
268
269                 if (vsocket->notify_ops->destroy_connection)
270                         vsocket->notify_ops->destroy_connection(conn->vid);
271
272                 goto err_cleanup;
273         }
274
275         pthread_mutex_lock(&vsocket->conn_mutex);
276         TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
277         pthread_mutex_unlock(&vsocket->conn_mutex);
278
279         fdset_pipe_notify(&vhost_user.fdset);
280         return;
281
282 err_cleanup:
283         vhost_destroy_device(vid);
284 err:
285         free(conn);
286         close(fd);
287 }
288
289 /* call back when there is new vhost-user connection from client  */
290 static void
291 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
292 {
293         struct vhost_user_socket *vsocket = dat;
294
295         fd = accept(fd, NULL, NULL);
296         if (fd < 0)
297                 return;
298
299         VHOST_LOG_CONFIG(INFO, "(%s) new vhost user connection is %d\n",
300                         vsocket->path, fd);
301         vhost_user_add_connection(fd, vsocket);
302 }
303
304 static void
305 vhost_user_read_cb(int connfd, void *dat, int *remove)
306 {
307         struct vhost_user_connection *conn = dat;
308         struct vhost_user_socket *vsocket = conn->vsocket;
309         int ret;
310
311         ret = vhost_user_msg_handler(conn->vid, connfd);
312         if (ret < 0) {
313                 struct virtio_net *dev = get_device(conn->vid);
314
315                 close(connfd);
316                 *remove = 1;
317
318                 if (dev)
319                         vhost_destroy_device_notify(dev);
320
321                 if (vsocket->notify_ops->destroy_connection)
322                         vsocket->notify_ops->destroy_connection(conn->vid);
323
324                 vhost_destroy_device(conn->vid);
325
326                 if (vsocket->reconnect) {
327                         create_unix_socket(vsocket);
328                         vhost_user_start_client(vsocket);
329                 }
330
331                 pthread_mutex_lock(&vsocket->conn_mutex);
332                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
333                 pthread_mutex_unlock(&vsocket->conn_mutex);
334
335                 free(conn);
336         }
337 }
338
339 static int
340 create_unix_socket(struct vhost_user_socket *vsocket)
341 {
342         int fd;
343         struct sockaddr_un *un = &vsocket->un;
344
345         fd = socket(AF_UNIX, SOCK_STREAM, 0);
346         if (fd < 0)
347                 return -1;
348         VHOST_LOG_CONFIG(INFO, "(%s) vhost-user %s: socket created, fd: %d\n",
349                 vsocket->path, vsocket->is_server ? "server" : "client", fd);
350
351         if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
352                 VHOST_LOG_CONFIG(ERR,
353                         "(%s) vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
354                         vsocket->path, fd, strerror(errno));
355                 close(fd);
356                 return -1;
357         }
358
359         memset(un, 0, sizeof(*un));
360         un->sun_family = AF_UNIX;
361         strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
362         un->sun_path[sizeof(un->sun_path) - 1] = '\0';
363
364         vsocket->socket_fd = fd;
365         return 0;
366 }
367
368 static int
369 vhost_user_start_server(struct vhost_user_socket *vsocket)
370 {
371         int ret;
372         int fd = vsocket->socket_fd;
373         const char *path = vsocket->path;
374
375         /*
376          * bind () may fail if the socket file with the same name already
377          * exists. But the library obviously should not delete the file
378          * provided by the user, since we can not be sure that it is not
379          * being used by other applications. Moreover, many applications form
380          * socket names based on user input, which is prone to errors.
381          *
382          * The user must ensure that the socket does not exist before
383          * registering the vhost driver in server mode.
384          */
385         ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
386         if (ret < 0) {
387                 VHOST_LOG_CONFIG(ERR, "(%s) failed to bind: %s; remove it and try again\n",
388                         path, strerror(errno));
389                 goto err;
390         }
391         VHOST_LOG_CONFIG(INFO, "(%s) binding succeeded\n", path);
392
393         ret = listen(fd, MAX_VIRTIO_BACKLOG);
394         if (ret < 0)
395                 goto err;
396
397         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
398                   NULL, vsocket);
399         if (ret < 0) {
400                 VHOST_LOG_CONFIG(ERR,
401                         "(%s) failed to add listen fd %d to vhost server fdset\n",
402                         path, fd);
403                 goto err;
404         }
405
406         return 0;
407
408 err:
409         close(fd);
410         return -1;
411 }
412
413 struct vhost_user_reconnect {
414         struct sockaddr_un un;
415         int fd;
416         struct vhost_user_socket *vsocket;
417
418         TAILQ_ENTRY(vhost_user_reconnect) next;
419 };
420
421 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
422 struct vhost_user_reconnect_list {
423         struct vhost_user_reconnect_tailq_list head;
424         pthread_mutex_t mutex;
425 };
426
427 static struct vhost_user_reconnect_list reconn_list;
428 static pthread_t reconn_tid;
429
430 static int
431 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
432 {
433         int ret, flags;
434
435         ret = connect(fd, un, sz);
436         if (ret < 0 && errno != EISCONN)
437                 return -1;
438
439         flags = fcntl(fd, F_GETFL, 0);
440         if (flags < 0) {
441                 VHOST_LOG_CONFIG(ERR, "(%s) can't get flags for connfd %d (%s)\n",
442                                 path, fd, strerror(errno));
443                 return -2;
444         }
445         if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
446                 VHOST_LOG_CONFIG(ERR, "(%s) can't disable nonblocking on fd %d\n", path, fd);
447                 return -2;
448         }
449         return 0;
450 }
451
452 static void *
453 vhost_user_client_reconnect(void *arg __rte_unused)
454 {
455         int ret;
456         struct vhost_user_reconnect *reconn, *next;
457
458         while (1) {
459                 pthread_mutex_lock(&reconn_list.mutex);
460
461                 /*
462                  * An equal implementation of TAILQ_FOREACH_SAFE,
463                  * which does not exist on all platforms.
464                  */
465                 for (reconn = TAILQ_FIRST(&reconn_list.head);
466                      reconn != NULL; reconn = next) {
467                         next = TAILQ_NEXT(reconn, next);
468
469                         ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
470                                                 (struct sockaddr *)&reconn->un,
471                                                 sizeof(reconn->un));
472                         if (ret == -2) {
473                                 close(reconn->fd);
474                                 VHOST_LOG_CONFIG(ERR, "(%s) reconnection for fd %d failed\n",
475                                         reconn->vsocket->path, reconn->fd);
476                                 goto remove_fd;
477                         }
478                         if (ret == -1)
479                                 continue;
480
481                         VHOST_LOG_CONFIG(INFO, "(%s) connected\n", reconn->vsocket->path);
482                         vhost_user_add_connection(reconn->fd, reconn->vsocket);
483 remove_fd:
484                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
485                         free(reconn);
486                 }
487
488                 pthread_mutex_unlock(&reconn_list.mutex);
489                 sleep(1);
490         }
491
492         return NULL;
493 }
494
495 static int
496 vhost_user_reconnect_init(void)
497 {
498         int ret;
499
500         ret = pthread_mutex_init(&reconn_list.mutex, NULL);
501         if (ret < 0) {
502                 VHOST_LOG_CONFIG(ERR, "%s: failed to initialize mutex", __func__);
503                 return ret;
504         }
505         TAILQ_INIT(&reconn_list.head);
506
507         ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
508                              vhost_user_client_reconnect, NULL);
509         if (ret != 0) {
510                 VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
511                 if (pthread_mutex_destroy(&reconn_list.mutex))
512                         VHOST_LOG_CONFIG(ERR, "%s: failed to destroy reconnect mutex", __func__);
513         }
514
515         return ret;
516 }
517
518 static int
519 vhost_user_start_client(struct vhost_user_socket *vsocket)
520 {
521         int ret;
522         int fd = vsocket->socket_fd;
523         const char *path = vsocket->path;
524         struct vhost_user_reconnect *reconn;
525
526         ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
527                                           sizeof(vsocket->un));
528         if (ret == 0) {
529                 vhost_user_add_connection(fd, vsocket);
530                 return 0;
531         }
532
533         VHOST_LOG_CONFIG(WARNING, "(%s) failed to connect: %s\n", path, strerror(errno));
534
535         if (ret == -2 || !vsocket->reconnect) {
536                 close(fd);
537                 return -1;
538         }
539
540         VHOST_LOG_CONFIG(INFO, "(%s) reconnecting...\n", path);
541         reconn = malloc(sizeof(*reconn));
542         if (reconn == NULL) {
543                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for reconnect\n", path);
544                 close(fd);
545                 return -1;
546         }
547         reconn->un = vsocket->un;
548         reconn->fd = fd;
549         reconn->vsocket = vsocket;
550         pthread_mutex_lock(&reconn_list.mutex);
551         TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
552         pthread_mutex_unlock(&reconn_list.mutex);
553
554         return 0;
555 }
556
557 static struct vhost_user_socket *
558 find_vhost_user_socket(const char *path)
559 {
560         int i;
561
562         if (path == NULL)
563                 return NULL;
564
565         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
566                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
567
568                 if (!strcmp(vsocket->path, path))
569                         return vsocket;
570         }
571
572         return NULL;
573 }
574
575 int
576 rte_vhost_driver_attach_vdpa_device(const char *path,
577                 struct rte_vdpa_device *dev)
578 {
579         struct vhost_user_socket *vsocket;
580
581         if (dev == NULL || path == NULL)
582                 return -1;
583
584         pthread_mutex_lock(&vhost_user.mutex);
585         vsocket = find_vhost_user_socket(path);
586         if (vsocket)
587                 vsocket->vdpa_dev = dev;
588         pthread_mutex_unlock(&vhost_user.mutex);
589
590         return vsocket ? 0 : -1;
591 }
592
593 int
594 rte_vhost_driver_detach_vdpa_device(const char *path)
595 {
596         struct vhost_user_socket *vsocket;
597
598         pthread_mutex_lock(&vhost_user.mutex);
599         vsocket = find_vhost_user_socket(path);
600         if (vsocket)
601                 vsocket->vdpa_dev = NULL;
602         pthread_mutex_unlock(&vhost_user.mutex);
603
604         return vsocket ? 0 : -1;
605 }
606
607 struct rte_vdpa_device *
608 rte_vhost_driver_get_vdpa_device(const char *path)
609 {
610         struct vhost_user_socket *vsocket;
611         struct rte_vdpa_device *dev = NULL;
612
613         pthread_mutex_lock(&vhost_user.mutex);
614         vsocket = find_vhost_user_socket(path);
615         if (vsocket)
616                 dev = vsocket->vdpa_dev;
617         pthread_mutex_unlock(&vhost_user.mutex);
618
619         return dev;
620 }
621
622 int
623 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
624 {
625         struct vhost_user_socket *vsocket;
626         struct rte_vdpa_device *vdpa_dev;
627         uint32_t vdpa_type = 0;
628         int ret = 0;
629
630         pthread_mutex_lock(&vhost_user.mutex);
631         vsocket = find_vhost_user_socket(path);
632         if (!vsocket) {
633                 VHOST_LOG_CONFIG(ERR,
634                                  "(%s) socket file is not registered yet.\n",
635                                  path);
636                 ret = -1;
637                 goto unlock_exit;
638         }
639
640         vdpa_dev = vsocket->vdpa_dev;
641         if (!vdpa_dev) {
642                 ret = -1;
643                 goto unlock_exit;
644         }
645
646         if (vdpa_dev->ops->get_dev_type) {
647                 ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
648                 if (ret) {
649                         VHOST_LOG_CONFIG(ERR,
650                                          "(%s) failed to get vdpa dev type for socket file.\n",
651                                          path);
652                         ret = -1;
653                         goto unlock_exit;
654                 }
655         } else {
656                 vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
657         }
658
659         *type = vdpa_type;
660
661 unlock_exit:
662         pthread_mutex_unlock(&vhost_user.mutex);
663         return ret;
664 }
665
666 int
667 rte_vhost_driver_disable_features(const char *path, uint64_t features)
668 {
669         struct vhost_user_socket *vsocket;
670
671         pthread_mutex_lock(&vhost_user.mutex);
672         vsocket = find_vhost_user_socket(path);
673
674         /* Note that use_builtin_virtio_net is not affected by this function
675          * since callers may want to selectively disable features of the
676          * built-in vhost net device backend.
677          */
678
679         if (vsocket)
680                 vsocket->features &= ~features;
681         pthread_mutex_unlock(&vhost_user.mutex);
682
683         return vsocket ? 0 : -1;
684 }
685
686 int
687 rte_vhost_driver_enable_features(const char *path, uint64_t features)
688 {
689         struct vhost_user_socket *vsocket;
690
691         pthread_mutex_lock(&vhost_user.mutex);
692         vsocket = find_vhost_user_socket(path);
693         if (vsocket) {
694                 if ((vsocket->supported_features & features) != features) {
695                         /*
696                          * trying to enable features the driver doesn't
697                          * support.
698                          */
699                         pthread_mutex_unlock(&vhost_user.mutex);
700                         return -1;
701                 }
702                 vsocket->features |= features;
703         }
704         pthread_mutex_unlock(&vhost_user.mutex);
705
706         return vsocket ? 0 : -1;
707 }
708
709 int
710 rte_vhost_driver_set_features(const char *path, uint64_t features)
711 {
712         struct vhost_user_socket *vsocket;
713
714         pthread_mutex_lock(&vhost_user.mutex);
715         vsocket = find_vhost_user_socket(path);
716         if (vsocket) {
717                 vsocket->supported_features = features;
718                 vsocket->features = features;
719
720                 /* Anyone setting feature bits is implementing their own vhost
721                  * device backend.
722                  */
723                 vsocket->use_builtin_virtio_net = false;
724         }
725         pthread_mutex_unlock(&vhost_user.mutex);
726
727         return vsocket ? 0 : -1;
728 }
729
730 int
731 rte_vhost_driver_get_features(const char *path, uint64_t *features)
732 {
733         struct vhost_user_socket *vsocket;
734         uint64_t vdpa_features;
735         struct rte_vdpa_device *vdpa_dev;
736         int ret = 0;
737
738         pthread_mutex_lock(&vhost_user.mutex);
739         vsocket = find_vhost_user_socket(path);
740         if (!vsocket) {
741                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
742                 ret = -1;
743                 goto unlock_exit;
744         }
745
746         vdpa_dev = vsocket->vdpa_dev;
747         if (!vdpa_dev) {
748                 *features = vsocket->features;
749                 goto unlock_exit;
750         }
751
752         if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
753                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa features for socket file.\n", path);
754                 ret = -1;
755                 goto unlock_exit;
756         }
757
758         *features = vsocket->features & vdpa_features;
759
760 unlock_exit:
761         pthread_mutex_unlock(&vhost_user.mutex);
762         return ret;
763 }
764
765 int
766 rte_vhost_driver_set_protocol_features(const char *path,
767                 uint64_t protocol_features)
768 {
769         struct vhost_user_socket *vsocket;
770
771         pthread_mutex_lock(&vhost_user.mutex);
772         vsocket = find_vhost_user_socket(path);
773         if (vsocket)
774                 vsocket->protocol_features = protocol_features;
775         pthread_mutex_unlock(&vhost_user.mutex);
776         return vsocket ? 0 : -1;
777 }
778
779 int
780 rte_vhost_driver_get_protocol_features(const char *path,
781                 uint64_t *protocol_features)
782 {
783         struct vhost_user_socket *vsocket;
784         uint64_t vdpa_protocol_features;
785         struct rte_vdpa_device *vdpa_dev;
786         int ret = 0;
787
788         pthread_mutex_lock(&vhost_user.mutex);
789         vsocket = find_vhost_user_socket(path);
790         if (!vsocket) {
791                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
792                 ret = -1;
793                 goto unlock_exit;
794         }
795
796         vdpa_dev = vsocket->vdpa_dev;
797         if (!vdpa_dev) {
798                 *protocol_features = vsocket->protocol_features;
799                 goto unlock_exit;
800         }
801
802         if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
803                                 &vdpa_protocol_features) < 0) {
804                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa protocol features.\n",
805                                 path);
806                 ret = -1;
807                 goto unlock_exit;
808         }
809
810         *protocol_features = vsocket->protocol_features
811                 & vdpa_protocol_features;
812
813 unlock_exit:
814         pthread_mutex_unlock(&vhost_user.mutex);
815         return ret;
816 }
817
818 int
819 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
820 {
821         struct vhost_user_socket *vsocket;
822         uint32_t vdpa_queue_num;
823         struct rte_vdpa_device *vdpa_dev;
824         int ret = 0;
825
826         pthread_mutex_lock(&vhost_user.mutex);
827         vsocket = find_vhost_user_socket(path);
828         if (!vsocket) {
829                 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
830                 ret = -1;
831                 goto unlock_exit;
832         }
833
834         vdpa_dev = vsocket->vdpa_dev;
835         if (!vdpa_dev) {
836                 *queue_num = VHOST_MAX_QUEUE_PAIRS;
837                 goto unlock_exit;
838         }
839
840         if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
841                 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa queue number.\n",
842                                 path);
843                 ret = -1;
844                 goto unlock_exit;
845         }
846
847         *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
848
849 unlock_exit:
850         pthread_mutex_unlock(&vhost_user.mutex);
851         return ret;
852 }
853
854 static void
855 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
856 {
857         if (vsocket && vsocket->path) {
858                 free(vsocket->path);
859                 vsocket->path = NULL;
860         }
861
862         if (vsocket) {
863                 free(vsocket);
864                 vsocket = NULL;
865         }
866 }
867
868 /*
869  * Register a new vhost-user socket; here we could act as server
870  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
871  * is set.
872  */
873 int
874 rte_vhost_driver_register(const char *path, uint64_t flags)
875 {
876         int ret = -1;
877         struct vhost_user_socket *vsocket;
878
879         if (!path)
880                 return -1;
881
882         pthread_mutex_lock(&vhost_user.mutex);
883
884         if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
885                 VHOST_LOG_CONFIG(ERR, "(%s) the number of vhost sockets reaches maximum\n",
886                                 path);
887                 goto out;
888         }
889
890         vsocket = malloc(sizeof(struct vhost_user_socket));
891         if (!vsocket)
892                 goto out;
893         memset(vsocket, 0, sizeof(struct vhost_user_socket));
894         vsocket->path = strdup(path);
895         if (vsocket->path == NULL) {
896                 VHOST_LOG_CONFIG(ERR, "(%s) failed to copy socket path string\n", path);
897                 vhost_user_socket_mem_free(vsocket);
898                 goto out;
899         }
900         TAILQ_INIT(&vsocket->conn_list);
901         ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
902         if (ret) {
903                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init connection mutex\n", path);
904                 goto out_free;
905         }
906         vsocket->vdpa_dev = NULL;
907         vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
908         vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
909         vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
910         vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
911         vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE;
912
913         if (vsocket->async_copy &&
914                 (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
915                 RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
916                 VHOST_LOG_CONFIG(ERR, "(%s) async copy with IOMMU or post-copy not supported\n",
917                                 path);
918                 goto out_mutex;
919         }
920
921         /*
922          * Set the supported features correctly for the builtin vhost-user
923          * net driver.
924          *
925          * Applications know nothing about features the builtin virtio net
926          * driver (virtio_net.c) supports, thus it's not possible for them
927          * to invoke rte_vhost_driver_set_features(). To workaround it, here
928          * we set it unconditionally. If the application want to implement
929          * another vhost-user driver (say SCSI), it should call the
930          * rte_vhost_driver_set_features(), which will overwrite following
931          * two values.
932          */
933         vsocket->use_builtin_virtio_net = true;
934         vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
935         vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
936         vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
937
938         if (vsocket->async_copy) {
939                 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
940                 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
941                 VHOST_LOG_CONFIG(INFO, "(%s) logging feature is disabled in async copy mode\n",
942                                 path);
943         }
944
945         /*
946          * We'll not be able to receive a buffer from guest in linear mode
947          * without external buffer if it will not fit in a single mbuf, which is
948          * likely if segmentation offloading enabled.
949          */
950         if (vsocket->linearbuf && !vsocket->extbuf) {
951                 uint64_t seg_offload_features =
952                                 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
953                                 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
954                                 (1ULL << VIRTIO_NET_F_HOST_UFO);
955
956                 VHOST_LOG_CONFIG(INFO, "(%s) Linear buffers requested without external buffers,\n",
957                                 path);
958                 VHOST_LOG_CONFIG(INFO, "(%s) disabling host segmentation offloading support\n",
959                                 path);
960                 vsocket->supported_features &= ~seg_offload_features;
961                 vsocket->features &= ~seg_offload_features;
962         }
963
964         if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
965                 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
966                 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
967         }
968
969         if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
970                 vsocket->protocol_features &=
971                         ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
972         } else {
973 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
974                 VHOST_LOG_CONFIG(ERR, "(%s) Postcopy requested but not compiled\n", path);
975                 ret = -1;
976                 goto out_mutex;
977 #endif
978         }
979
980         if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
981                 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
982                 if (vsocket->reconnect && reconn_tid == 0) {
983                         if (vhost_user_reconnect_init() != 0)
984                                 goto out_mutex;
985                 }
986         } else {
987                 vsocket->is_server = true;
988         }
989         ret = create_unix_socket(vsocket);
990         if (ret < 0) {
991                 goto out_mutex;
992         }
993
994         vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
995
996         pthread_mutex_unlock(&vhost_user.mutex);
997         return ret;
998
999 out_mutex:
1000         if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
1001                 VHOST_LOG_CONFIG(ERR, "(%s) failed to destroy connection mutex\n", path);
1002         }
1003 out_free:
1004         vhost_user_socket_mem_free(vsocket);
1005 out:
1006         pthread_mutex_unlock(&vhost_user.mutex);
1007
1008         return ret;
1009 }
1010
1011 static bool
1012 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
1013 {
1014         int found = false;
1015         struct vhost_user_reconnect *reconn, *next;
1016
1017         pthread_mutex_lock(&reconn_list.mutex);
1018
1019         for (reconn = TAILQ_FIRST(&reconn_list.head);
1020              reconn != NULL; reconn = next) {
1021                 next = TAILQ_NEXT(reconn, next);
1022
1023                 if (reconn->vsocket == vsocket) {
1024                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
1025                         close(reconn->fd);
1026                         free(reconn);
1027                         found = true;
1028                         break;
1029                 }
1030         }
1031         pthread_mutex_unlock(&reconn_list.mutex);
1032         return found;
1033 }
1034
1035 /**
1036  * Unregister the specified vhost socket
1037  */
1038 int
1039 rte_vhost_driver_unregister(const char *path)
1040 {
1041         int i;
1042         int count;
1043         struct vhost_user_connection *conn, *next;
1044
1045         if (path == NULL)
1046                 return -1;
1047
1048 again:
1049         pthread_mutex_lock(&vhost_user.mutex);
1050
1051         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1052                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1053                 if (strcmp(vsocket->path, path))
1054                         continue;
1055
1056                 if (vsocket->is_server) {
1057                         /*
1058                          * If r/wcb is executing, release vhost_user's
1059                          * mutex lock, and try again since the r/wcb
1060                          * may use the mutex lock.
1061                          */
1062                         if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1063                                 pthread_mutex_unlock(&vhost_user.mutex);
1064                                 goto again;
1065                         }
1066                 } else if (vsocket->reconnect) {
1067                         vhost_user_remove_reconnect(vsocket);
1068                 }
1069
1070                 pthread_mutex_lock(&vsocket->conn_mutex);
1071                 for (conn = TAILQ_FIRST(&vsocket->conn_list);
1072                          conn != NULL;
1073                          conn = next) {
1074                         next = TAILQ_NEXT(conn, next);
1075
1076                         /*
1077                          * If r/wcb is executing, release vsocket's
1078                          * conn_mutex and vhost_user's mutex locks, and
1079                          * try again since the r/wcb may use the
1080                          * conn_mutex and mutex locks.
1081                          */
1082                         if (fdset_try_del(&vhost_user.fdset,
1083                                           conn->connfd) == -1) {
1084                                 pthread_mutex_unlock(&vsocket->conn_mutex);
1085                                 pthread_mutex_unlock(&vhost_user.mutex);
1086                                 goto again;
1087                         }
1088
1089                         VHOST_LOG_CONFIG(INFO, "(%s) free connfd %d\n", path, conn->connfd);
1090                         close(conn->connfd);
1091                         vhost_destroy_device(conn->vid);
1092                         TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1093                         free(conn);
1094                 }
1095                 pthread_mutex_unlock(&vsocket->conn_mutex);
1096
1097                 if (vsocket->is_server) {
1098                         close(vsocket->socket_fd);
1099                         unlink(path);
1100                 }
1101
1102                 pthread_mutex_destroy(&vsocket->conn_mutex);
1103                 vhost_user_socket_mem_free(vsocket);
1104
1105                 count = --vhost_user.vsocket_cnt;
1106                 vhost_user.vsockets[i] = vhost_user.vsockets[count];
1107                 vhost_user.vsockets[count] = NULL;
1108                 pthread_mutex_unlock(&vhost_user.mutex);
1109                 return 0;
1110         }
1111         pthread_mutex_unlock(&vhost_user.mutex);
1112
1113         return -1;
1114 }
1115
1116 /*
1117  * Register ops so that we can add/remove device to data core.
1118  */
1119 int
1120 rte_vhost_driver_callback_register(const char *path,
1121         struct rte_vhost_device_ops const * const ops)
1122 {
1123         struct vhost_user_socket *vsocket;
1124
1125         pthread_mutex_lock(&vhost_user.mutex);
1126         vsocket = find_vhost_user_socket(path);
1127         if (vsocket)
1128                 vsocket->notify_ops = ops;
1129         pthread_mutex_unlock(&vhost_user.mutex);
1130
1131         return vsocket ? 0 : -1;
1132 }
1133
1134 struct rte_vhost_device_ops const *
1135 vhost_driver_callback_get(const char *path)
1136 {
1137         struct vhost_user_socket *vsocket;
1138
1139         pthread_mutex_lock(&vhost_user.mutex);
1140         vsocket = find_vhost_user_socket(path);
1141         pthread_mutex_unlock(&vhost_user.mutex);
1142
1143         return vsocket ? vsocket->notify_ops : NULL;
1144 }
1145
1146 int
1147 rte_vhost_driver_start(const char *path)
1148 {
1149         struct vhost_user_socket *vsocket;
1150         static pthread_t fdset_tid;
1151
1152         pthread_mutex_lock(&vhost_user.mutex);
1153         vsocket = find_vhost_user_socket(path);
1154         pthread_mutex_unlock(&vhost_user.mutex);
1155
1156         if (!vsocket)
1157                 return -1;
1158
1159         if (fdset_tid == 0) {
1160                 /**
1161                  * create a pipe which will be waited by poll and notified to
1162                  * rebuild the wait list of poll.
1163                  */
1164                 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1165                         VHOST_LOG_CONFIG(ERR, "(%s) failed to create pipe for vhost fdset\n", path);
1166                         return -1;
1167                 }
1168
1169                 int ret = rte_ctrl_thread_create(&fdset_tid,
1170                         "vhost-events", NULL, fdset_event_dispatch,
1171                         &vhost_user.fdset);
1172                 if (ret != 0) {
1173                         VHOST_LOG_CONFIG(ERR, "(%s) failed to create fdset handling thread", path);
1174
1175                         fdset_pipe_uninit(&vhost_user.fdset);
1176                         return -1;
1177                 }
1178         }
1179
1180         if (vsocket->is_server)
1181                 return vhost_user_start_server(vsocket);
1182         else
1183                 return vhost_user_start_client(vsocket);
1184 }