4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <sys/socket.h>
39 /* sys/un.h with __USE_MISC uses strlen, which is unsafe */
41 #define REMOVED_USE_MISC
45 /* make sure we redefine __USE_MISC only if it was previously undefined */
46 #ifdef REMOVED_USE_MISC
48 #undef REMOVED_USE_MISC
53 #include <rte_eal_memconfig.h>
54 #include <rte_malloc.h>
57 #include "eal_filesystem.h"
58 #include "eal_pci_init.h"
60 #include "eal_thread.h"
64 * VFIO socket for communication between primary and secondary processes.
66 * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
71 #define SOCKET_PATH_FMT "%s/.%s_mp_socket"
72 #define CMSGLEN (CMSG_LEN(sizeof(int)))
73 #define FD_TO_CMSGHDR(fd, chdr) \
75 (chdr).cmsg_len = CMSGLEN;\
76 (chdr).cmsg_level = SOL_SOCKET;\
77 (chdr).cmsg_type = SCM_RIGHTS;\
78 memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
80 #define CMSGHDR_TO_FD(chdr, fd) \
81 memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
83 static pthread_t socket_thread;
84 static int mp_socket_fd;
87 /* get socket path (/var/run if root, $HOME otherwise) */
89 get_socket_path(char *buffer, int bufsz)
91 const char *dir = "/var/run";
92 const char *home_dir = getenv("HOME");
94 if (getuid() != 0 && home_dir != NULL)
97 /* use current prefix as file path */
98 snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
99 internal_config.hugefile_prefix);
105 * data flow for socket comm protocol:
106 * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
107 * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
108 * 2. server receives message
109 * 2a. in case of invalid group, SOCKET_ERR is sent back to client
110 * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
111 * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
113 * in case of any error, socket is closed.
116 /* send a request, return -1 on error */
118 vfio_mp_sync_send_request(int socket, int req)
125 memset(&hdr, 0, sizeof(hdr));
131 iov.iov_base = (char *) &buf;
132 iov.iov_len = sizeof(buf);
134 ret = sendmsg(socket, &hdr, 0);
140 /* receive a request and return it */
142 vfio_mp_sync_receive_request(int socket)
149 memset(&hdr, 0, sizeof(hdr));
155 iov.iov_base = (char *) &buf;
156 iov.iov_len = sizeof(buf);
158 ret = recvmsg(socket, &hdr, 0);
167 /* send OK in message, fd in control message */
169 vfio_mp_sync_send_fd(int socket, int fd)
173 struct cmsghdr *chdr;
174 char chdr_buf[CMSGLEN];
178 chdr = (struct cmsghdr *) chdr_buf;
179 memset(chdr, 0, sizeof(chdr_buf));
180 memset(&hdr, 0, sizeof(hdr));
184 iov.iov_base = (char *) &buf;
185 iov.iov_len = sizeof(buf);
186 hdr.msg_control = chdr;
187 hdr.msg_controllen = CMSGLEN;
190 FD_TO_CMSGHDR(fd, *chdr);
192 ret = sendmsg(socket, &hdr, 0);
198 /* receive OK in message, fd in control message */
200 vfio_mp_sync_receive_fd(int socket)
204 struct cmsghdr *chdr;
205 char chdr_buf[CMSGLEN];
211 chdr = (struct cmsghdr *) chdr_buf;
212 memset(chdr, 0, sizeof(chdr_buf));
213 memset(&hdr, 0, sizeof(hdr));
217 iov.iov_base = (char *) &buf;
218 iov.iov_len = sizeof(buf);
219 hdr.msg_control = chdr;
220 hdr.msg_controllen = CMSGLEN;
222 ret = recvmsg(socket, &hdr, 0);
228 if (req != SOCKET_OK)
231 CMSGHDR_TO_FD(*chdr, fd);
236 /* connect socket_fd in secondary process to the primary process's socket */
238 vfio_mp_sync_connect_to_primary(void)
240 struct sockaddr_un addr;
241 socklen_t sockaddr_len;
244 /* set up a socket */
245 socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
247 RTE_LOG(ERR, EAL, "Failed to create socket!\n");
251 get_socket_path(addr.sun_path, sizeof(addr.sun_path));
252 addr.sun_family = AF_UNIX;
254 sockaddr_len = sizeof(struct sockaddr_un);
256 if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
259 /* if connect failed */
267 * socket listening thread for primary process
269 static __attribute__((noreturn)) void *
270 vfio_mp_sync_thread(void __rte_unused * arg)
272 int ret, fd, vfio_data;
274 /* wait for requests on the socket */
277 struct sockaddr_un addr;
278 socklen_t sockaddr_len = sizeof(addr);
280 /* this is a blocking call */
281 conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
284 /* just restart on error */
288 /* set socket to linger after close */
293 if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
294 RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
295 "on listen socket (%s)\n", strerror(errno));
297 ret = vfio_mp_sync_receive_request(conn_sock);
300 case SOCKET_REQ_CONTAINER:
301 fd = vfio_get_container_fd();
303 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
305 vfio_mp_sync_send_fd(conn_sock, fd);
309 case SOCKET_REQ_GROUP:
310 /* wait for group number */
311 vfio_data = vfio_mp_sync_receive_request(conn_sock);
317 fd = vfio_get_group_fd(vfio_data);
320 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
321 /* if VFIO group exists but isn't bound to VFIO driver */
323 vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
324 /* if group exists and is bound to VFIO driver */
326 vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
327 vfio_mp_sync_send_fd(conn_sock, fd);
330 case SOCKET_CLR_GROUP:
331 /* wait for group fd */
332 vfio_data = vfio_mp_sync_receive_request(conn_sock);
338 ret = clear_group(vfio_data);
341 vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
343 vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
346 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
354 vfio_mp_sync_socket_setup(void)
357 struct sockaddr_un addr;
358 socklen_t sockaddr_len;
360 /* set up a socket */
361 socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
363 RTE_LOG(ERR, EAL, "Failed to create socket!\n");
367 get_socket_path(addr.sun_path, sizeof(addr.sun_path));
368 addr.sun_family = AF_UNIX;
370 sockaddr_len = sizeof(struct sockaddr_un);
372 unlink(addr.sun_path);
374 ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
376 RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
381 ret = listen(socket_fd, 50);
383 RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
388 /* save the socket in local configuration */
389 mp_socket_fd = socket_fd;
395 * set up a local socket and tell it to listen for incoming connections
398 vfio_mp_sync_setup(void)
401 char thread_name[RTE_MAX_THREAD_NAME_LEN];
403 if (vfio_mp_sync_socket_setup() < 0) {
404 RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
408 ret = pthread_create(&socket_thread, NULL,
409 vfio_mp_sync_thread, NULL);
412 "Failed to create thread for communication with secondary processes!\n");
417 /* Set thread_name for aid in debugging. */
418 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
419 ret = rte_thread_setname(socket_thread, thread_name);
422 "Failed to set thread name for secondary processes!\n");