X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvirtio%2Fvirtio_user%2Fvhost_kernel.c;h=69f932bede0c925803e258558c96649558623893;hb=539d910c9c76876dfcfefea632dd32c603d76672;hp=aeb5075b82217290596fc95b876865ba5b829927;hpb=5e97e42025635e03ff5c256275d7d7650fff4dbc;p=dpdk.git diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c index aeb5075b82..69f932bede 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel.c +++ b/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -1,43 +1,14 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Intel Corporation */ #include #include #include #include +#include #include -#include #include "vhost.h" #include "virtio_user_dev.h" @@ -67,6 +38,28 @@ struct vhost_memory_kernel { #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + static uint64_t max_regions = 64; static void @@ -85,159 +78,201 @@ get_vhost_kernel_max_regions(void) close(fd); } -static uint64_t vhost_req_user_to_kernel[] = { - [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, - [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, - [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, - [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, - [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, - [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, - [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, - [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, - [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, - [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, - [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, -}; +static int +vhost_kernel_ioctl(int fd, uint64_t request, void *arg) +{ + int ret; -/* By default, vhost kernel module allows 64 regions, but DPDK allows - * 256 segments. As a relief, below function merges those virtually - * adjacent memsegs into one region. - */ -static struct vhost_memory_kernel * -prepare_vhost_memory_kernel(void) + ret = ioctl(fd, request, arg); + if (ret) { + PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)", + request, strerror(errno)); + return -1; + } + + return 0; +} + +static int +vhost_kernel_set_owner(struct virtio_user_dev *dev) { - uint32_t i, j, k = 0; - struct rte_memseg *seg; - struct vhost_memory_region *mr; - struct vhost_memory_kernel *vm; + return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_OWNER, NULL); +} - vm = malloc(sizeof(struct vhost_memory_kernel) + - max_regions * - sizeof(struct vhost_memory_region)); +static int +vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features) +{ + int ret; + unsigned int tap_features; - for (i = 0; i < RTE_MAX_MEMSEG; ++i) { - seg = &rte_eal_get_configuration()->mem_config->memseg[i]; - if (!seg->addr) - break; + ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_GET_FEATURES, features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get features"); + return -1; + } - int new_region = 1; + ret = tap_support_features(&tap_features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get TAP features"); + return -1; + } - for (j = 0; j < k; ++j) { - mr = &vm->regions[j]; + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + if (tap_features & IFF_VNET_HDR) { + *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + } - if (mr->userspace_addr + mr->memory_size == - (uint64_t)(uintptr_t)seg->addr) { - mr->memory_size += seg->len; - new_region = 0; - break; - } - - if ((uint64_t)(uintptr_t)seg->addr + seg->len == - mr->userspace_addr) { - mr->guest_phys_addr = - (uint64_t)(uintptr_t)seg->addr; - mr->userspace_addr = - (uint64_t)(uintptr_t)seg->addr; - mr->memory_size += seg->len; - new_region = 0; - break; - } - } + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_features & IFF_MULTI_QUEUE) + *features |= (1ull << VIRTIO_NET_F_MQ); - if (new_region == 0) - continue; + return 0; +} - mr = &vm->regions[k++]; - /* use vaddr here! */ - mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; - mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; - mr->memory_size = seg->len; - mr->mmap_offset = 0; +static int +vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features) +{ + /* We don't need memory protection here */ + features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + /* VHOST kernel does not know about below flags */ + features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + features &= ~(1ULL << VIRTIO_NET_F_MQ); + + return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_FEATURES, &features); +} - if (k >= max_regions) { - free(vm); - return NULL; - } - } +static int +add_memseg_list(const struct rte_memseg_list *msl, void *arg) +{ + struct vhost_memory_kernel *vm = arg; + struct vhost_memory_region *mr; + void *start_addr; + uint64_t len; - vm->nregions = k; - vm->padding = 0; - return vm; + if (msl->external) + return 0; + + if (vm->nregions >= max_regions) + return -1; + + start_addr = msl->base_va; + len = msl->page_sz * msl->memseg_arr.len; + + mr = &vm->regions[vm->nregions++]; + + mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; + mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; + mr->memory_size = len; + mr->mmap_offset = 0; /* flags_padding */ + + PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, + vm->nregions - 1, start_addr, len); + + return 0; } -/* with below features, vhost kernel does not need to do the checksum and TSO, - * these info will be passed to virtio_user through virtio net header. +/* By default, vhost kernel module allows 64 regions, but DPDK may + * have much more memory regions. Below function will treat each + * contiguous memory space reserved by DPDK as one region. */ -#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ - (1ULL << VIRTIO_NET_F_GUEST_UFO)) +static int +vhost_kernel_set_memory_table(struct virtio_user_dev *dev) +{ + struct vhost_memory_kernel *vm; + int ret; -/* with below features, when flows from virtio_user to vhost kernel - * (1) if flows goes up through the kernel networking stack, it does not need - * to verify checksum, which can save CPU cycles; - * (2) if flows goes through a Linux bridge and outside from an interface - * (kernel driver), checksum and TSO will be done by GSO in kernel or even - * offloaded into real physical device. - */ -#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM)) + vm = malloc(sizeof(struct vhost_memory_kernel) + + max_regions * + sizeof(struct vhost_memory_region)); + if (!vm) + goto err; + + vm->nregions = 0; + vm->padding = 0; + + /* + * The memory lock has already been taken by memory subsystem + * or virtio_user_start_device(). + */ + ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm); + if (ret < 0) + goto err_free; + + ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_MEM_TABLE, vm); + if (ret < 0) + goto err_free; + + free(vm); + + return 0; +err_free: + free(vm); +err: + PMD_DRV_LOG(ERR, "Failed to set memory table"); + return -1; +} + +static uint64_t vhost_req_user_to_kernel[] = { + [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, + [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, + [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, + [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, + [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, + [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, + [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, +}; static int -vhost_kernel_ioctl(struct virtio_user_dev *dev, +vhost_kernel_send_request(struct virtio_user_dev *dev, enum vhost_user_request req, void *arg) { int ret = -1; unsigned int i; uint64_t req_kernel; - struct vhost_memory_kernel *vm = NULL; + int vhostfd; + unsigned int queue_sel; PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); req_kernel = vhost_req_user_to_kernel[req]; - if (req_kernel == VHOST_SET_MEM_TABLE) { - vm = prepare_vhost_memory_kernel(); - if (!vm) - return -1; - arg = (void *)vm; - } - - if (req_kernel == VHOST_SET_FEATURES) { - /* We don't need memory protection here */ - *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); - - /* VHOST kernel does not know about below flags */ - *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; - } - - for (i = 0; i < dev->max_queue_pairs; ++i) { - if (dev->vhostfds[i] < 0) - continue; - - ret = ioctl(dev->vhostfds[i], req_kernel, arg); - if (ret < 0) - break; + switch (req_kernel) { + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_ADDR: + case VHOST_SET_VRING_BASE: + case VHOST_GET_VRING_BASE: + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + queue_sel = *(unsigned int *)arg; + vhostfd = dev->vhostfds[queue_sel / 2]; + *(unsigned int *)arg = queue_sel % 2; + PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", + vhostfd, *(unsigned int *)arg); + break; + default: + vhostfd = -1; } + if (vhostfd == -1) { + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (dev->vhostfds[i] < 0) + continue; - if (!ret && req_kernel == VHOST_GET_FEATURES) { - /* with tap as the backend, all these features are supported - * but not claimed by vhost-net, so we add them back when - * reporting to upper layer. - */ - *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + ret = ioctl(dev->vhostfds[i], req_kernel, arg); + if (ret < 0) + break; + } + } else { + ret = ioctl(vhostfd, req_kernel, arg); } - if (vm) - free(vm); - if (ret < 0) PMD_DRV_LOG(ERR, "%s failed: %s", vhost_msg_strings[req], strerror(errno)); @@ -305,43 +340,69 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, int hdr_size; int vhostfd; int tapfd; + int req_mq = (dev->max_queue_pairs > 1); vhostfd = dev->vhostfds[pair_idx]; + if (dev->qp_enabled[pair_idx] == enable) + return 0; + if (!enable) { - if (dev->tapfds[pair_idx]) { - close(dev->tapfds[pair_idx]); - dev->tapfds[pair_idx] = -1; + tapfd = dev->tapfds[pair_idx]; + if (vhost_kernel_set_backend(vhostfd, -1) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); + return -1; + } + if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) { + PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel"); + return -1; } - return vhost_kernel_set_backend(vhostfd, -1); - } else if (dev->tapfds[pair_idx] >= 0) { + dev->qp_enabled[pair_idx] = false; return 0; } + if (dev->tapfds[pair_idx] >= 0) { + tapfd = dev->tapfds[pair_idx]; + if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) + return -1; + if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { + PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel"); + return -1; + } + goto set_backend; + } + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || (dev->features & (1ULL << VIRTIO_F_VERSION_1))) hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); else hdr_size = sizeof(struct virtio_net_hdr); - tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size); + tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, + (char *)dev->mac_addr, dev->features); if (tapfd < 0) { PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); return -1; } + dev->tapfds[pair_idx] = tapfd; + +set_backend: if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); - close(tapfd); return -1; } - dev->tapfds[pair_idx] = tapfd; + dev->qp_enabled[pair_idx] = true; return 0; } -struct virtio_user_backend_ops ops_kernel = { +struct virtio_user_backend_ops virtio_ops_kernel = { .setup = vhost_kernel_setup, - .send_request = vhost_kernel_ioctl, + .set_owner = vhost_kernel_set_owner, + .get_features = vhost_kernel_get_features, + .set_features = vhost_kernel_set_features, + .set_memory_table = vhost_kernel_set_memory_table, + .send_request = vhost_kernel_send_request, .enable_qp = vhost_kernel_enable_queue_pair };