a480f9f67281361aeacf7cdba2039938a3d4937f
[dpdk.git] / lib / librte_vhost / vhost_user / vhost-net-user.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdbool.h>
37 #include <limits.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <sys/un.h>
44 #include <errno.h>
45 #include <pthread.h>
46
47 #include <rte_log.h>
48 #include <rte_virtio_net.h>
49
50 #include "fd_man.h"
51 #include "vhost-net-user.h"
52 #include "vhost-net.h"
53 #include "virtio-net-user.h"
54
55 /*
56  * Every time rte_vhost_driver_register() is invoked, an associated
57  * vhost_user_socket struct will be created.
58  */
59 struct vhost_user_socket {
60         char *path;
61         int listenfd;
62         bool is_server;
63 };
64
65 struct vhost_user_connection {
66         struct vhost_user_socket *vsocket;
67         int vid;
68 };
69
70 #define MAX_VHOST_SOCKET 1024
71 struct vhost_user {
72         struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
73         struct fdset fdset;
74         int vsocket_cnt;
75         pthread_mutex_t mutex;
76 };
77
78 #define MAX_VIRTIO_BACKLOG 128
79
80 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
81 static void vhost_user_msg_handler(int fd, void *dat, int *remove);
82
83 static struct vhost_user vhost_user = {
84         .fdset = {
85                 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
86                 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
87                 .num = 0
88         },
89         .vsocket_cnt = 0,
90         .mutex = PTHREAD_MUTEX_INITIALIZER,
91 };
92
93 static const char *vhost_message_str[VHOST_USER_MAX] = {
94         [VHOST_USER_NONE] = "VHOST_USER_NONE",
95         [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
96         [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
97         [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
98         [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
99         [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
100         [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
101         [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
102         [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
103         [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
104         [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
105         [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
106         [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
107         [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
108         [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
109         [VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
110         [VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
111         [VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
112         [VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
113         [VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
114 };
115
116 /* return bytes# of read on success or negative val on failure. */
117 static int
118 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
119 {
120         struct iovec iov;
121         struct msghdr msgh;
122         size_t fdsize = fd_num * sizeof(int);
123         char control[CMSG_SPACE(fdsize)];
124         struct cmsghdr *cmsg;
125         int ret;
126
127         memset(&msgh, 0, sizeof(msgh));
128         iov.iov_base = buf;
129         iov.iov_len  = buflen;
130
131         msgh.msg_iov = &iov;
132         msgh.msg_iovlen = 1;
133         msgh.msg_control = control;
134         msgh.msg_controllen = sizeof(control);
135
136         ret = recvmsg(sockfd, &msgh, 0);
137         if (ret <= 0) {
138                 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
139                 return ret;
140         }
141
142         if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
143                 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
144                 return -1;
145         }
146
147         for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
148                 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
149                 if ((cmsg->cmsg_level == SOL_SOCKET) &&
150                         (cmsg->cmsg_type == SCM_RIGHTS)) {
151                         memcpy(fds, CMSG_DATA(cmsg), fdsize);
152                         break;
153                 }
154         }
155
156         return ret;
157 }
158
159 /* return bytes# of read on success or negative val on failure. */
160 static int
161 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
162 {
163         int ret;
164
165         ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
166                 msg->fds, VHOST_MEMORY_MAX_NREGIONS);
167         if (ret <= 0)
168                 return ret;
169
170         if (msg && msg->size) {
171                 if (msg->size > sizeof(msg->payload)) {
172                         RTE_LOG(ERR, VHOST_CONFIG,
173                                 "invalid msg size: %d\n", msg->size);
174                         return -1;
175                 }
176                 ret = read(sockfd, &msg->payload, msg->size);
177                 if (ret <= 0)
178                         return ret;
179                 if (ret != (int)msg->size) {
180                         RTE_LOG(ERR, VHOST_CONFIG,
181                                 "read control message failed\n");
182                         return -1;
183                 }
184         }
185
186         return ret;
187 }
188
189 static int
190 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
191 {
192
193         struct iovec iov;
194         struct msghdr msgh;
195         size_t fdsize = fd_num * sizeof(int);
196         char control[CMSG_SPACE(fdsize)];
197         struct cmsghdr *cmsg;
198         int ret;
199
200         memset(&msgh, 0, sizeof(msgh));
201         iov.iov_base = buf;
202         iov.iov_len = buflen;
203
204         msgh.msg_iov = &iov;
205         msgh.msg_iovlen = 1;
206
207         if (fds && fd_num > 0) {
208                 msgh.msg_control = control;
209                 msgh.msg_controllen = sizeof(control);
210                 cmsg = CMSG_FIRSTHDR(&msgh);
211                 cmsg->cmsg_len = CMSG_LEN(fdsize);
212                 cmsg->cmsg_level = SOL_SOCKET;
213                 cmsg->cmsg_type = SCM_RIGHTS;
214                 memcpy(CMSG_DATA(cmsg), fds, fdsize);
215         } else {
216                 msgh.msg_control = NULL;
217                 msgh.msg_controllen = 0;
218         }
219
220         do {
221                 ret = sendmsg(sockfd, &msgh, 0);
222         } while (ret < 0 && errno == EINTR);
223
224         if (ret < 0) {
225                 RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
226                 return ret;
227         }
228
229         return ret;
230 }
231
232 static int
233 send_vhost_message(int sockfd, struct VhostUserMsg *msg)
234 {
235         int ret;
236
237         if (!msg)
238                 return 0;
239
240         msg->flags &= ~VHOST_USER_VERSION_MASK;
241         msg->flags |= VHOST_USER_VERSION;
242         msg->flags |= VHOST_USER_REPLY_MASK;
243
244         ret = send_fd_message(sockfd, (char *)msg,
245                 VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
246
247         return ret;
248 }
249
250
251 static void
252 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
253 {
254         int vid;
255         size_t size;
256         struct vhost_user_connection *conn;
257
258         conn = malloc(sizeof(*conn));
259         if (conn == NULL) {
260                 close(fd);
261                 return;
262         }
263
264         vid = vhost_new_device();
265         if (vid == -1) {
266                 close(fd);
267                 free(conn);
268                 return;
269         }
270
271         size = strnlen(vsocket->path, PATH_MAX);
272         vhost_set_ifname(vid, vsocket->path, size);
273
274         RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
275
276         conn->vsocket = vsocket;
277         conn->vid = vid;
278         fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
279 }
280
281 /* call back when there is new vhost-user connection from client  */
282 static void
283 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
284 {
285         struct vhost_user_socket *vsocket = dat;
286
287         fd = accept(fd, NULL, NULL);
288         if (fd < 0)
289                 return;
290
291         RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
292         vhost_user_add_connection(fd, vsocket);
293 }
294
295 /* callback when there is message on the connfd */
296 static void
297 vhost_user_msg_handler(int connfd, void *dat, int *remove)
298 {
299         int vid;
300         struct vhost_user_connection *conn = dat;
301         struct VhostUserMsg msg;
302         uint64_t features;
303         int ret;
304
305         vid = conn->vid;
306         ret = read_vhost_message(connfd, &msg);
307         if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
308                 if (ret < 0)
309                         RTE_LOG(ERR, VHOST_CONFIG,
310                                 "vhost read message failed\n");
311                 else if (ret == 0)
312                         RTE_LOG(INFO, VHOST_CONFIG,
313                                 "vhost peer closed\n");
314                 else
315                         RTE_LOG(ERR, VHOST_CONFIG,
316                                 "vhost read incorrect message\n");
317
318                 close(connfd);
319                 *remove = 1;
320                 free(conn);
321                 vhost_destroy_device(vid);
322
323                 return;
324         }
325
326         RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
327                 vhost_message_str[msg.request]);
328         switch (msg.request) {
329         case VHOST_USER_GET_FEATURES:
330                 ret = vhost_get_features(vid, &features);
331                 msg.payload.u64 = features;
332                 msg.size = sizeof(msg.payload.u64);
333                 send_vhost_message(connfd, &msg);
334                 break;
335         case VHOST_USER_SET_FEATURES:
336                 features = msg.payload.u64;
337                 vhost_set_features(vid, &features);
338                 break;
339
340         case VHOST_USER_GET_PROTOCOL_FEATURES:
341                 msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
342                 msg.size = sizeof(msg.payload.u64);
343                 send_vhost_message(connfd, &msg);
344                 break;
345         case VHOST_USER_SET_PROTOCOL_FEATURES:
346                 user_set_protocol_features(vid, msg.payload.u64);
347                 break;
348
349         case VHOST_USER_SET_OWNER:
350                 vhost_set_owner(vid);
351                 break;
352         case VHOST_USER_RESET_OWNER:
353                 vhost_reset_owner(vid);
354                 break;
355
356         case VHOST_USER_SET_MEM_TABLE:
357                 user_set_mem_table(vid, &msg);
358                 break;
359
360         case VHOST_USER_SET_LOG_BASE:
361                 user_set_log_base(vid, &msg);
362
363                 /* it needs a reply */
364                 msg.size = sizeof(msg.payload.u64);
365                 send_vhost_message(connfd, &msg);
366                 break;
367         case VHOST_USER_SET_LOG_FD:
368                 close(msg.fds[0]);
369                 RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
370                 break;
371
372         case VHOST_USER_SET_VRING_NUM:
373                 vhost_set_vring_num(vid, &msg.payload.state);
374                 break;
375         case VHOST_USER_SET_VRING_ADDR:
376                 vhost_set_vring_addr(vid, &msg.payload.addr);
377                 break;
378         case VHOST_USER_SET_VRING_BASE:
379                 vhost_set_vring_base(vid, &msg.payload.state);
380                 break;
381
382         case VHOST_USER_GET_VRING_BASE:
383                 ret = user_get_vring_base(vid, &msg.payload.state);
384                 msg.size = sizeof(msg.payload.state);
385                 send_vhost_message(connfd, &msg);
386                 break;
387
388         case VHOST_USER_SET_VRING_KICK:
389                 user_set_vring_kick(vid, &msg);
390                 break;
391         case VHOST_USER_SET_VRING_CALL:
392                 user_set_vring_call(vid, &msg);
393                 break;
394
395         case VHOST_USER_SET_VRING_ERR:
396                 if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
397                         close(msg.fds[0]);
398                 RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
399                 break;
400
401         case VHOST_USER_GET_QUEUE_NUM:
402                 msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
403                 msg.size = sizeof(msg.payload.u64);
404                 send_vhost_message(connfd, &msg);
405                 break;
406
407         case VHOST_USER_SET_VRING_ENABLE:
408                 user_set_vring_enable(vid, &msg.payload.state);
409                 break;
410         case VHOST_USER_SEND_RARP:
411                 user_send_rarp(vid, &msg);
412                 break;
413
414         default:
415                 break;
416
417         }
418 }
419
420 static int
421 create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
422 {
423         int fd;
424
425         fd = socket(AF_UNIX, SOCK_STREAM, 0);
426         if (fd < 0)
427                 return -1;
428         RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
429                 is_server ? "server" : "client", fd);
430
431         memset(un, 0, sizeof(*un));
432         un->sun_family = AF_UNIX;
433         strncpy(un->sun_path, path, sizeof(un->sun_path));
434
435         return fd;
436 }
437
438 static int
439 vhost_user_create_server(struct vhost_user_socket *vsocket)
440 {
441         int fd;
442         int ret;
443         struct sockaddr_un un;
444         const char *path = vsocket->path;
445
446         fd = create_unix_socket(path, &un, vsocket->is_server);
447         if (fd < 0)
448                 return -1;
449
450         ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
451         if (ret < 0) {
452                 RTE_LOG(ERR, VHOST_CONFIG,
453                         "failed to bind to %s: %s; remove it and try again\n",
454                         path, strerror(errno));
455                 goto err;
456         }
457         RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
458
459         ret = listen(fd, MAX_VIRTIO_BACKLOG);
460         if (ret < 0)
461                 goto err;
462
463         vsocket->listenfd = fd;
464         fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
465                   NULL, vsocket);
466
467         return 0;
468
469 err:
470         close(fd);
471         return -1;
472 }
473
474 static int
475 vhost_user_create_client(struct vhost_user_socket *vsocket)
476 {
477         int fd;
478         int ret;
479         struct sockaddr_un un;
480         const char *path = vsocket->path;
481
482         fd = create_unix_socket(path, &un, vsocket->is_server);
483         if (fd < 0)
484                 return -1;
485
486         ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
487         if (ret < 0) {
488                 RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
489                         path, strerror(errno));
490                 close(fd);
491                 return -1;
492         }
493
494         vhost_user_add_connection(fd, vsocket);
495
496         return 0;
497 }
498
499 /*
500  * Register a new vhost-user socket; here we could act as server
501  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
502  * is set.
503  */
504 int
505 rte_vhost_driver_register(const char *path, uint64_t flags)
506 {
507         int ret = -1;
508         struct vhost_user_socket *vsocket;
509
510         if (!path)
511                 return -1;
512
513         pthread_mutex_lock(&vhost_user.mutex);
514
515         if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
516                 RTE_LOG(ERR, VHOST_CONFIG,
517                         "error: the number of vhost sockets reaches maximum\n");
518                 goto out;
519         }
520
521         vsocket = malloc(sizeof(struct vhost_user_socket));
522         if (!vsocket)
523                 goto out;
524         memset(vsocket, 0, sizeof(struct vhost_user_socket));
525         vsocket->path = strdup(path);
526
527         if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
528                 ret = vhost_user_create_client(vsocket);
529         } else {
530                 vsocket->is_server = true;
531                 ret = vhost_user_create_server(vsocket);
532         }
533         if (ret < 0) {
534                 free(vsocket->path);
535                 free(vsocket);
536                 goto out;
537         }
538
539         vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
540
541 out:
542         pthread_mutex_unlock(&vhost_user.mutex);
543
544         return ret;
545 }
546
547 /**
548  * Unregister the specified vhost socket
549  */
550 int
551 rte_vhost_driver_unregister(const char *path)
552 {
553         int i;
554         int count;
555
556         pthread_mutex_lock(&vhost_user.mutex);
557
558         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
559                 if (!strcmp(vhost_user.vsockets[i]->path, path)) {
560                         if (vhost_user.vsockets[i]->is_server) {
561                                 fdset_del(&vhost_user.fdset,
562                                         vhost_user.vsockets[i]->listenfd);
563                                 close(vhost_user.vsockets[i]->listenfd);
564                                 unlink(path);
565                         }
566
567                         free(vhost_user.vsockets[i]->path);
568                         free(vhost_user.vsockets[i]);
569
570                         count = --vhost_user.vsocket_cnt;
571                         vhost_user.vsockets[i] = vhost_user.vsockets[count];
572                         vhost_user.vsockets[count] = NULL;
573                         pthread_mutex_unlock(&vhost_user.mutex);
574
575                         return 0;
576                 }
577         }
578         pthread_mutex_unlock(&vhost_user.mutex);
579
580         return -1;
581 }
582
583 int
584 rte_vhost_driver_session_start(void)
585 {
586         fdset_event_dispatch(&vhost_user.fdset);
587         return 0;
588 }