4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <fuse/cuse_lowlevel.h>
36 #include <linux/limits.h>
37 #include <linux/vhost.h>
42 #include <rte_ethdev.h>
44 #include <rte_string_fns.h>
45 #include <rte_virtio_net.h>
47 #include "virtio-net-cdev.h"
48 #include "vhost-net.h"
49 #include "eventfd_copy.h"
51 #define FUSE_OPT_DUMMY "\0\0"
52 #define FUSE_OPT_FORE "-f\0\0"
53 #define FUSE_OPT_NOMULTI "-s\0\0"
55 static const uint32_t default_major = 231;
56 static const uint32_t default_minor = 1;
57 static const char cuse_device_name[] = "/dev/cuse";
58 static const char default_cdev[] = "vhost-net";
60 static struct fuse_session *session;
63 * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
64 * when the device is added to the device linked list.
66 static struct vhost_device_ctx
67 fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
69 struct vhost_device_ctx ctx;
70 struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
72 ctx.pid = req_ctx->pid;
79 * When the device is created in QEMU it gets initialised here and
80 * added to the device linked list.
83 vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
85 struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
88 err = vhost_new_device(ctx);
90 fuse_reply_err(req, EPERM);
96 RTE_LOG(INFO, VHOST_CONFIG,
97 "(%"PRIu64") Device configuration started\n", fi->fh);
98 fuse_reply_open(req, fi);
102 * When QEMU is shutdown or killed the device gets released.
105 vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
108 struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
110 vhost_destroy_device(ctx);
111 RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
112 fuse_reply_err(req, err);
116 * Boilerplate code for CUSE IOCTL
117 * Implicit arguments: ctx, req, result.
119 #define VHOST_IOCTL(func) do { \
120 result = (func)(ctx); \
121 fuse_reply_ioctl(req, result, NULL, 0); \
125 * Boilerplate IOCTL RETRY
126 * Implicit arguments: req.
128 #define VHOST_IOCTL_RETRY(size_r, size_w) do { \
129 struct iovec iov_r = { arg, (size_r) }; \
130 struct iovec iov_w = { arg, (size_w) }; \
131 fuse_reply_ioctl_retry(req, &iov_r, \
132 (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
136 * Boilerplate code for CUSE Read IOCTL
137 * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
139 #define VHOST_IOCTL_R(type, var, func) do { \
141 VHOST_IOCTL_RETRY(sizeof(type), 0);\
143 (var) = *(const type*)in_buf; \
144 result = func(ctx, &(var)); \
145 fuse_reply_ioctl(req, result, NULL, 0);\
150 * Boilerplate code for CUSE Write IOCTL
151 * Implicit arguments: ctx, req, result, out_bufsz.
153 #define VHOST_IOCTL_W(type, var, func) do { \
155 VHOST_IOCTL_RETRY(0, sizeof(type));\
157 result = (func)(ctx, &(var));\
158 fuse_reply_ioctl(req, result, &(var), sizeof(type));\
163 * Boilerplate code for CUSE Read/Write IOCTL
164 * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
166 #define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
168 VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
170 (var1) = *(const type1*) (in_buf); \
171 result = (func)(ctx, (var1), &(var2)); \
172 fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
177 * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
178 * of IOCTL a buffer is requested to read or to write. This request is handled
179 * by FUSE and the buffer is then given to CUSE.
182 vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
183 struct fuse_file_info *fi, __rte_unused unsigned flags,
184 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
186 struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
187 struct vhost_vring_file file;
188 struct vhost_vring_state state;
189 struct vhost_vring_addr addr;
195 case VHOST_NET_SET_BACKEND:
196 LOG_DEBUG(VHOST_CONFIG,
197 "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
199 VHOST_IOCTL_RETRY(sizeof(file), 0);
202 file = *(const struct vhost_vring_file *)in_buf;
203 result = cuse_set_backend(ctx, &file);
204 fuse_reply_ioctl(req, result, NULL, 0);
207 case VHOST_GET_FEATURES:
208 LOG_DEBUG(VHOST_CONFIG,
209 "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
210 VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
213 case VHOST_SET_FEATURES:
214 LOG_DEBUG(VHOST_CONFIG,
215 "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
216 VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
219 case VHOST_RESET_OWNER:
220 LOG_DEBUG(VHOST_CONFIG,
221 "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
222 VHOST_IOCTL(vhost_reset_owner);
225 case VHOST_SET_OWNER:
226 LOG_DEBUG(VHOST_CONFIG,
227 "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
228 VHOST_IOCTL(vhost_set_owner);
231 case VHOST_SET_MEM_TABLE:
232 /*TODO fix race condition.*/
233 LOG_DEBUG(VHOST_CONFIG,
234 "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
235 static struct vhost_memory mem_temp;
239 VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
242 case sizeof(struct vhost_memory):
243 mem_temp = *(const struct vhost_memory *) in_buf;
245 if (mem_temp.nregions > 0) {
246 VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
247 (sizeof(struct vhost_memory_region) *
248 mem_temp.nregions), 0);
251 fuse_reply_ioctl(req, result, NULL, 0);
256 result = cuse_set_mem_table(ctx, in_buf,
259 fuse_reply_err(req, EINVAL);
261 fuse_reply_ioctl(req, result, NULL, 0);
265 case VHOST_SET_VRING_NUM:
266 LOG_DEBUG(VHOST_CONFIG,
267 "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
268 VHOST_IOCTL_R(struct vhost_vring_state, state,
269 vhost_set_vring_num);
272 case VHOST_SET_VRING_BASE:
273 LOG_DEBUG(VHOST_CONFIG,
274 "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
275 VHOST_IOCTL_R(struct vhost_vring_state, state,
276 vhost_set_vring_base);
279 case VHOST_GET_VRING_BASE:
280 LOG_DEBUG(VHOST_CONFIG,
281 "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
282 VHOST_IOCTL_RW(uint32_t, index,
283 struct vhost_vring_state, state, vhost_get_vring_base);
286 case VHOST_SET_VRING_ADDR:
287 LOG_DEBUG(VHOST_CONFIG,
288 "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
289 VHOST_IOCTL_R(struct vhost_vring_addr, addr,
290 vhost_set_vring_addr);
293 case VHOST_SET_VRING_KICK:
294 case VHOST_SET_VRING_CALL:
295 if (cmd == VHOST_SET_VRING_KICK)
296 LOG_DEBUG(VHOST_CONFIG,
297 "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n",
300 LOG_DEBUG(VHOST_CONFIG,
301 "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n",
304 VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
307 file = *(const struct vhost_vring_file *)in_buf;
308 LOG_DEBUG(VHOST_CONFIG,
309 "idx:%d fd:%d\n", file.index, file.fd);
310 fd = eventfd_copy(file.fd, ctx.pid);
312 fuse_reply_ioctl(req, -1, NULL, 0);
317 if (cmd == VHOST_SET_VRING_KICK) {
318 result = vhost_set_vring_kick(ctx, &file);
319 fuse_reply_ioctl(req, result, NULL, 0);
321 result = vhost_set_vring_call(ctx, &file);
322 fuse_reply_ioctl(req, result, NULL, 0);
328 RTE_LOG(ERR, VHOST_CONFIG,
329 "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
331 fuse_reply_ioctl(req, result, NULL, 0);
335 LOG_DEBUG(VHOST_CONFIG,
336 "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
338 LOG_DEBUG(VHOST_CONFIG,
339 "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
343 * Structure handling open, release and ioctl function pointers is populated.
345 static const struct cuse_lowlevel_ops vhost_net_ops = {
346 .open = vhost_net_open,
347 .release = vhost_net_release,
348 .ioctl = vhost_net_ioctl,
352 * cuse_info is populated and used to register the cuse device.
353 * vhost_net_device_ops are also passed when the device is registered in app.
356 rte_vhost_driver_register(const char *dev_name)
358 struct cuse_info cuse_info;
359 char device_name[PATH_MAX] = "";
360 char char_device_name[PATH_MAX] = "";
361 const char *device_argv[] = { device_name };
363 char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
364 char fuse_opt_fore[] = FUSE_OPT_FORE;
365 char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
366 char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
368 if (access(cuse_device_name, R_OK | W_OK) < 0) {
369 RTE_LOG(ERR, VHOST_CONFIG,
370 "char device %s can't be accessed, maybe not exist\n",
375 if (eventfd_init() < 0)
379 * The device name is created. This is passed to QEMU so that it can
380 * register the device with our application.
382 snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
383 snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
385 /* Check if device already exists. */
386 if (access(char_device_name, F_OK) != -1) {
387 RTE_LOG(ERR, VHOST_CONFIG,
388 "char device %s already exists\n", char_device_name);
392 memset(&cuse_info, 0, sizeof(cuse_info));
393 cuse_info.dev_major = default_major;
394 cuse_info.dev_minor = default_minor;
395 cuse_info.dev_info_argc = 1;
396 cuse_info.dev_info_argv = device_argv;
397 cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
399 session = cuse_lowlevel_setup(3, fuse_argv,
400 &cuse_info, &vhost_net_ops, 0, NULL);
408 * An empty function for unregister
411 rte_vhost_driver_unregister(const char *dev_name __rte_unused)
417 * The CUSE session is launched allowing the application to receive open,
418 * release and ioctl calls.
421 rte_vhost_driver_session_start(void)
423 fuse_session_loop(session);