1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
5 #include <sys/socket.h>
14 #include <rte_string_fns.h>
15 #include <rte_fbarray.h>
18 #include "virtio_user_dev.h"
20 /* The version of the protocol we support */
21 #define VHOST_USER_VERSION 0x1
23 #define VHOST_MEMORY_MAX_NREGIONS 8
27 struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
30 enum vhost_user_request {
32 VHOST_USER_GET_FEATURES = 1,
33 VHOST_USER_SET_FEATURES = 2,
34 VHOST_USER_SET_OWNER = 3,
35 VHOST_USER_RESET_OWNER = 4,
36 VHOST_USER_SET_MEM_TABLE = 5,
37 VHOST_USER_SET_LOG_BASE = 6,
38 VHOST_USER_SET_LOG_FD = 7,
39 VHOST_USER_SET_VRING_NUM = 8,
40 VHOST_USER_SET_VRING_ADDR = 9,
41 VHOST_USER_SET_VRING_BASE = 10,
42 VHOST_USER_GET_VRING_BASE = 11,
43 VHOST_USER_SET_VRING_KICK = 12,
44 VHOST_USER_SET_VRING_CALL = 13,
45 VHOST_USER_SET_VRING_ERR = 14,
46 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
47 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
48 VHOST_USER_GET_QUEUE_NUM = 17,
49 VHOST_USER_SET_VRING_ENABLE = 18,
50 VHOST_USER_SET_STATUS = 39,
51 VHOST_USER_GET_STATUS = 40,
55 struct vhost_user_msg {
56 enum vhost_user_request request;
58 #define VHOST_USER_VERSION_MASK 0x3
59 #define VHOST_USER_REPLY_MASK (0x1 << 2)
60 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
62 uint32_t size; /* the following payload size */
64 #define VHOST_USER_VRING_IDX_MASK 0xff
65 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
67 struct vhost_vring_state state;
68 struct vhost_vring_addr addr;
69 struct vhost_memory memory;
71 int fds[VHOST_MEMORY_MAX_NREGIONS];
74 #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
75 #define VHOST_USER_PAYLOAD_SIZE \
76 (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
79 vhost_user_write(int fd, struct vhost_user_msg *msg, int *fds, int fd_num)
84 size_t fd_size = fd_num * sizeof(int);
85 char control[CMSG_SPACE(fd_size)];
88 memset(&msgh, 0, sizeof(msgh));
89 memset(control, 0, sizeof(control));
91 iov.iov_base = (uint8_t *)msg;
92 iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
96 msgh.msg_control = control;
97 msgh.msg_controllen = sizeof(control);
99 cmsg = CMSG_FIRSTHDR(&msgh);
100 cmsg->cmsg_len = CMSG_LEN(fd_size);
101 cmsg->cmsg_level = SOL_SOCKET;
102 cmsg->cmsg_type = SCM_RIGHTS;
103 memcpy(CMSG_DATA(cmsg), fds, fd_size);
106 r = sendmsg(fd, &msgh, 0);
107 } while (r < 0 && errno == EINTR);
110 PMD_DRV_LOG(ERR, "Failed to send msg: %s", strerror(errno));
116 vhost_user_read(int fd, struct vhost_user_msg *msg)
118 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
119 int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
121 ret = recv(fd, (void *)msg, sz_hdr, 0);
123 PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
128 /* validate msg flags */
129 if (msg->flags != (valid_flags)) {
130 PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.",
131 msg->flags, valid_flags);
135 sz_payload = msg->size;
137 if ((size_t)sz_payload > sizeof(msg->payload))
141 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
142 if (ret < sz_payload) {
144 "Failed to recv msg payload: %d instead of %d.",
157 vhost_user_check_reply_ack(struct virtio_user_dev *dev, struct vhost_user_msg *msg)
159 enum vhost_user_request req = msg->request;
162 if (!(msg->flags & VHOST_USER_NEED_REPLY_MASK))
165 ret = vhost_user_read(dev->vhostfd, msg);
167 PMD_DRV_LOG(ERR, "Failed to read reply-ack");
171 if (req != msg->request) {
172 PMD_DRV_LOG(ERR, "Unexpected reply-ack request type (%d)", msg->request);
176 if (msg->size != sizeof(msg->payload.u64)) {
177 PMD_DRV_LOG(ERR, "Unexpected reply-ack payload size (%u)", msg->size);
181 if (msg->payload.u64) {
182 PMD_DRV_LOG(ERR, "Slave replied NACK to request type (%d)", msg->request);
190 vhost_user_set_owner(struct virtio_user_dev *dev)
193 struct vhost_user_msg msg = {
194 .request = VHOST_USER_SET_OWNER,
195 .flags = VHOST_USER_VERSION,
198 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
200 PMD_DRV_LOG(ERR, "Failed to set owner");
208 vhost_user_get_features(struct virtio_user_dev *dev, uint64_t *features)
211 struct vhost_user_msg msg = {
212 .request = VHOST_USER_GET_FEATURES,
213 .flags = VHOST_USER_VERSION,
216 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
220 ret = vhost_user_read(dev->vhostfd, &msg);
224 if (msg.request != VHOST_USER_GET_FEATURES) {
225 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
229 if (msg.size != sizeof(*features)) {
230 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
234 *features = msg.payload.u64;
238 PMD_DRV_LOG(ERR, "Failed to get backend features");
244 vhost_user_set_features(struct virtio_user_dev *dev, uint64_t features)
247 struct vhost_user_msg msg = {
248 .request = VHOST_USER_SET_FEATURES,
249 .flags = VHOST_USER_VERSION,
250 .size = sizeof(features),
251 .payload.u64 = features,
254 msg.payload.u64 |= dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
256 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
258 PMD_DRV_LOG(ERR, "Failed to set features");
266 vhost_user_get_protocol_features(struct virtio_user_dev *dev, uint64_t *features)
269 struct vhost_user_msg msg = {
270 .request = VHOST_USER_GET_PROTOCOL_FEATURES,
271 .flags = VHOST_USER_VERSION,
274 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
278 ret = vhost_user_read(dev->vhostfd, &msg);
282 if (msg.request != VHOST_USER_GET_PROTOCOL_FEATURES) {
283 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
287 if (msg.size != sizeof(*features)) {
288 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
292 *features = msg.payload.u64;
296 PMD_DRV_LOG(ERR, "Failed to get backend protocol features");
302 vhost_user_set_protocol_features(struct virtio_user_dev *dev, uint64_t features)
305 struct vhost_user_msg msg = {
306 .request = VHOST_USER_SET_PROTOCOL_FEATURES,
307 .flags = VHOST_USER_VERSION,
308 .size = sizeof(features),
309 .payload.u64 = features,
312 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
314 PMD_DRV_LOG(ERR, "Failed to set protocol features");
322 struct vhost_memory *vm;
328 update_memory_region(const struct rte_memseg_list *msl __rte_unused,
329 const struct rte_memseg *ms, void *arg)
331 struct walk_arg *wa = arg;
332 struct vhost_memory_region *mr;
333 uint64_t start_addr, end_addr;
337 fd = rte_memseg_get_fd_thread_unsafe(ms);
339 PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d",
344 if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) {
345 PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d",
350 start_addr = (uint64_t)(uintptr_t)ms->addr;
351 end_addr = start_addr + ms->len;
353 for (i = 0; i < wa->region_nr; i++) {
354 if (wa->fds[i] != fd)
357 mr = &wa->vm->regions[i];
359 if (mr->userspace_addr + mr->memory_size < end_addr)
360 mr->memory_size = end_addr - mr->userspace_addr;
362 if (mr->userspace_addr > start_addr) {
363 mr->userspace_addr = start_addr;
364 mr->guest_phys_addr = start_addr;
367 if (mr->mmap_offset > offset)
368 mr->mmap_offset = offset;
370 PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
371 " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
372 mr->mmap_offset, mr->userspace_addr,
378 if (i >= VHOST_MEMORY_MAX_NREGIONS) {
379 PMD_DRV_LOG(ERR, "Too many memory regions");
383 mr = &wa->vm->regions[i];
386 mr->guest_phys_addr = start_addr;
387 mr->userspace_addr = start_addr;
388 mr->memory_size = ms->len;
389 mr->mmap_offset = offset;
391 PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
392 " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
393 mr->mmap_offset, mr->userspace_addr,
402 vhost_user_set_memory_table(struct virtio_user_dev *dev)
405 int fds[VHOST_MEMORY_MAX_NREGIONS];
407 struct vhost_user_msg msg = {
408 .request = VHOST_USER_SET_MEM_TABLE,
409 .flags = VHOST_USER_VERSION,
412 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
413 msg.flags |= VHOST_USER_NEED_REPLY_MASK;
416 wa.vm = &msg.payload.memory;
420 * The memory lock has already been taken by memory subsystem
421 * or virtio_user_start_device().
423 ret = rte_memseg_walk_thread_unsafe(update_memory_region, &wa);
427 fd_num = wa.region_nr;
428 msg.payload.memory.nregions = wa.region_nr;
429 msg.payload.memory.padding = 0;
431 msg.size = sizeof(msg.payload.memory.nregions);
432 msg.size += sizeof(msg.payload.memory.padding);
433 msg.size += fd_num * sizeof(struct vhost_memory_region);
435 ret = vhost_user_write(dev->vhostfd, &msg, fds, fd_num);
439 return vhost_user_check_reply_ack(dev, &msg);
441 PMD_DRV_LOG(ERR, "Failed to set memory table");
446 vhost_user_set_vring(struct virtio_user_dev *dev, enum vhost_user_request req,
447 struct vhost_vring_state *state)
450 struct vhost_user_msg msg = {
452 .flags = VHOST_USER_VERSION,
453 .size = sizeof(*state),
454 .payload.state = *state,
457 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
459 PMD_DRV_LOG(ERR, "Failed to set vring state (request %d)", req);
467 vhost_user_set_vring_enable(struct virtio_user_dev *dev, struct vhost_vring_state *state)
469 return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, state);
473 vhost_user_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
475 return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_NUM, state);
479 vhost_user_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
481 return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_BASE, state);
485 vhost_user_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
488 struct vhost_user_msg msg;
489 unsigned int index = state->index;
491 ret = vhost_user_set_vring(dev, VHOST_USER_GET_VRING_BASE, state);
493 PMD_DRV_LOG(ERR, "Failed to send request");
497 ret = vhost_user_read(dev->vhostfd, &msg);
499 PMD_DRV_LOG(ERR, "Failed to read reply");
503 if (msg.request != VHOST_USER_GET_VRING_BASE) {
504 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
508 if (msg.size != sizeof(*state)) {
509 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
513 if (msg.payload.state.index != index) {
514 PMD_DRV_LOG(ERR, "Unexpected ring index (%u)", state->index);
518 *state = msg.payload.state;
522 PMD_DRV_LOG(ERR, "Failed to get vring base");
527 vhost_user_set_vring_file(struct virtio_user_dev *dev, enum vhost_user_request req,
528 struct vhost_vring_file *file)
533 struct vhost_user_msg msg = {
535 .flags = VHOST_USER_VERSION,
536 .size = sizeof(msg.payload.u64),
537 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
543 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
545 ret = vhost_user_write(dev->vhostfd, &msg, &fd, num_fd);
547 PMD_DRV_LOG(ERR, "Failed to set vring file (request %d)", req);
555 vhost_user_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
557 return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
561 vhost_user_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
563 return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
568 vhost_user_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
571 struct vhost_user_msg msg = {
572 .request = VHOST_USER_SET_VRING_ADDR,
573 .flags = VHOST_USER_VERSION,
574 .size = sizeof(*addr),
575 .payload.addr = *addr,
578 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
580 PMD_DRV_LOG(ERR, "Failed to send vring addresses");
588 vhost_user_get_status(struct virtio_user_dev *dev, uint8_t *status)
591 struct vhost_user_msg msg = {
592 .request = VHOST_USER_GET_STATUS,
593 .flags = VHOST_USER_VERSION,
597 * If features have not been negotiated, we don't know if the backend
598 * supports protocol features
600 if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
603 /* Status protocol feature requires protocol features support */
604 if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
607 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
610 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
612 PMD_DRV_LOG(ERR, "Failed to send request");
616 ret = vhost_user_read(dev->vhostfd, &msg);
618 PMD_DRV_LOG(ERR, "Failed to recv request");
622 if (msg.request != VHOST_USER_GET_STATUS) {
623 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
627 if (msg.size != sizeof(msg.payload.u64)) {
628 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
632 *status = (uint8_t)msg.payload.u64;
636 PMD_DRV_LOG(ERR, "Failed to get device status");
641 vhost_user_set_status(struct virtio_user_dev *dev, uint8_t status)
644 struct vhost_user_msg msg = {
645 .request = VHOST_USER_SET_STATUS,
646 .flags = VHOST_USER_VERSION,
647 .size = sizeof(msg.payload.u64),
648 .payload.u64 = status,
652 * If features have not been negotiated, we don't know if the backend
653 * supports protocol features
655 if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
658 /* Status protocol feature requires protocol features support */
659 if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
662 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
665 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
666 msg.flags |= VHOST_USER_NEED_REPLY_MASK;
668 ret = vhost_user_write(dev->vhostfd, &msg, NULL, 0);
670 PMD_DRV_LOG(ERR, "Failed to send get status request");
674 return vhost_user_check_reply_ack(dev, &msg);
677 #define MAX_VIRTIO_USER_BACKLOG 1
679 virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
683 int fd = dev->listenfd;
685 ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
687 PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
688 dev->path, strerror(errno));
691 ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
695 PMD_DRV_LOG(NOTICE, "(%s) waiting for client connection...", dev->path);
696 dev->vhostfd = accept(fd, NULL, NULL);
697 if (dev->vhostfd < 0) {
698 PMD_DRV_LOG(ERR, "Failed to accept initial client connection (%s)",
703 flag = fcntl(fd, F_GETFL);
704 if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
705 PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno));
713 * Set up environment to talk with a vhost user backend.
720 vhost_user_setup(struct virtio_user_dev *dev)
724 struct sockaddr_un un;
726 fd = socket(AF_UNIX, SOCK_STREAM, 0);
728 PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
732 flag = fcntl(fd, F_GETFD);
733 if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0)
734 PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno));
736 memset(&un, 0, sizeof(un));
737 un.sun_family = AF_UNIX;
738 strlcpy(un.sun_path, dev->path, sizeof(un.sun_path));
740 if (dev->is_server) {
742 if (virtio_user_start_server(dev, &un) < 0) {
743 PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
748 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
749 PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
760 vhost_user_enable_queue_pair(struct virtio_user_dev *dev,
766 if (dev->qp_enabled[pair_idx] == enable)
769 for (i = 0; i < 2; ++i) {
770 struct vhost_vring_state state = {
771 .index = pair_idx * 2 + i,
775 if (vhost_user_set_vring_enable(dev, &state))
779 dev->qp_enabled[pair_idx] = enable;
783 struct virtio_user_backend_ops virtio_ops_user = {
784 .setup = vhost_user_setup,
785 .set_owner = vhost_user_set_owner,
786 .get_features = vhost_user_get_features,
787 .set_features = vhost_user_set_features,
788 .get_protocol_features = vhost_user_get_protocol_features,
789 .set_protocol_features = vhost_user_set_protocol_features,
790 .set_memory_table = vhost_user_set_memory_table,
791 .set_vring_num = vhost_user_set_vring_num,
792 .set_vring_base = vhost_user_set_vring_base,
793 .get_vring_base = vhost_user_get_vring_base,
794 .set_vring_call = vhost_user_set_vring_call,
795 .set_vring_kick = vhost_user_set_vring_kick,
796 .set_vring_addr = vhost_user_set_vring_addr,
797 .get_status = vhost_user_get_status,
798 .set_status = vhost_user_set_status,
799 .enable_qp = vhost_user_enable_queue_pair