X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvirtio%2Fvirtio_user%2Fvhost_kernel.c;h=69f932bede0c925803e258558c96649558623893;hb=539d910c9c76876dfcfefea632dd32c603d76672;hp=8d0a1ab237b687f0bb85dc5b60d2e3ede0bdd432;hpb=5566a3e35866ce9e5eacf886c27b460ebfcd6ee9;p=dpdk.git diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c index 8d0a1ab237..69f932bede 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel.c +++ b/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -6,9 +6,9 @@ #include #include #include +#include #include -#include #include "vhost.h" #include "virtio_user_dev.h" @@ -38,6 +38,28 @@ struct vhost_memory_kernel { #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + static uint64_t max_regions = 64; static void @@ -56,142 +78,165 @@ get_vhost_kernel_max_regions(void) close(fd); } -static uint64_t vhost_req_user_to_kernel[] = { - [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, - [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, - [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, - [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, - [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, - [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, - [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, - [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, - [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, - [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, - [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, -}; - -/* By default, vhost kernel module allows 64 regions, but DPDK allows - * 256 segments. As a relief, below function merges those virtually - * adjacent memsegs into one region. - */ -static struct vhost_memory_kernel * -prepare_vhost_memory_kernel(void) +static int +vhost_kernel_ioctl(int fd, uint64_t request, void *arg) { - uint32_t i, j, k = 0; - struct rte_memseg *seg; - struct vhost_memory_region *mr; - struct vhost_memory_kernel *vm; - - vm = malloc(sizeof(struct vhost_memory_kernel) + - max_regions * - sizeof(struct vhost_memory_region)); - if (!vm) - return NULL; + int ret; - for (i = 0; i < RTE_MAX_MEMSEG; ++i) { - seg = &rte_eal_get_configuration()->mem_config->memseg[i]; - if (!seg->addr) - break; + ret = ioctl(fd, request, arg); + if (ret) { + PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)", + request, strerror(errno)); + return -1; + } - int new_region = 1; + return 0; +} - for (j = 0; j < k; ++j) { - mr = &vm->regions[j]; +static int +vhost_kernel_set_owner(struct virtio_user_dev *dev) +{ + return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_OWNER, NULL); +} - if (mr->userspace_addr + mr->memory_size == - (uint64_t)(uintptr_t)seg->addr) { - mr->memory_size += seg->len; - new_region = 0; - break; - } - - if ((uint64_t)(uintptr_t)seg->addr + seg->len == - mr->userspace_addr) { - mr->guest_phys_addr = - (uint64_t)(uintptr_t)seg->addr; - mr->userspace_addr = - (uint64_t)(uintptr_t)seg->addr; - mr->memory_size += seg->len; - new_region = 0; - break; - } - } +static int +vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features) +{ + int ret; + unsigned int tap_features; - if (new_region == 0) - continue; + ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_GET_FEATURES, features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get features"); + return -1; + } - mr = &vm->regions[k++]; - /* use vaddr here! */ - mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; - mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; - mr->memory_size = seg->len; - mr->mmap_offset = 0; + ret = tap_support_features(&tap_features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get TAP features"); + return -1; + } - if (k >= max_regions) { - free(vm); - return NULL; - } + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + if (tap_features & IFF_VNET_HDR) { + *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK; } - vm->nregions = k; - vm->padding = 0; - return vm; -} + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_features & IFF_MULTI_QUEUE) + *features |= (1ull << VIRTIO_NET_F_MQ); -/* with below features, vhost kernel does not need to do the checksum and TSO, - * these info will be passed to virtio_user through virtio net header. - */ -#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ - (1ULL << VIRTIO_NET_F_GUEST_UFO)) + return 0; +} -/* with below features, when flows from virtio_user to vhost kernel - * (1) if flows goes up through the kernel networking stack, it does not need - * to verify checksum, which can save CPU cycles; - * (2) if flows goes through a Linux bridge and outside from an interface - * (kernel driver), checksum and TSO will be done by GSO in kernel or even - * offloaded into real physical device. - */ -#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM)) +static int +vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features) +{ + /* We don't need memory protection here */ + features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + /* VHOST kernel does not know about below flags */ + features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + features &= ~(1ULL << VIRTIO_NET_F_MQ); + + return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_FEATURES, &features); +} static int -tap_supporte_mq(void) +add_memseg_list(const struct rte_memseg_list *msl, void *arg) { - int tapfd; - unsigned int tap_features; + struct vhost_memory_kernel *vm = arg; + struct vhost_memory_region *mr; + void *start_addr; + uint64_t len; - tapfd = open(PATH_NET_TUN, O_RDWR); - if (tapfd < 0) { - PMD_DRV_LOG(ERR, "fail to open %s: %s", - PATH_NET_TUN, strerror(errno)); - return -1; - } + if (msl->external) + return 0; - if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { - PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); - close(tapfd); + if (vm->nregions >= max_regions) return -1; - } - close(tapfd); - return tap_features & IFF_MULTI_QUEUE; + start_addr = msl->base_va; + len = msl->page_sz * msl->memseg_arr.len; + + mr = &vm->regions[vm->nregions++]; + + mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; + mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; + mr->memory_size = len; + mr->mmap_offset = 0; /* flags_padding */ + + PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, + vm->nregions - 1, start_addr, len); + + return 0; +} + +/* By default, vhost kernel module allows 64 regions, but DPDK may + * have much more memory regions. Below function will treat each + * contiguous memory space reserved by DPDK as one region. + */ +static int +vhost_kernel_set_memory_table(struct virtio_user_dev *dev) +{ + struct vhost_memory_kernel *vm; + int ret; + + vm = malloc(sizeof(struct vhost_memory_kernel) + + max_regions * + sizeof(struct vhost_memory_region)); + if (!vm) + goto err; + + vm->nregions = 0; + vm->padding = 0; + + /* + * The memory lock has already been taken by memory subsystem + * or virtio_user_start_device(). + */ + ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm); + if (ret < 0) + goto err_free; + + ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_MEM_TABLE, vm); + if (ret < 0) + goto err_free; + + free(vm); + + return 0; +err_free: + free(vm); +err: + PMD_DRV_LOG(ERR, "Failed to set memory table"); + return -1; } +static uint64_t vhost_req_user_to_kernel[] = { + [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, + [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, + [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, + [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, + [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, + [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, + [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, +}; + static int -vhost_kernel_ioctl(struct virtio_user_dev *dev, +vhost_kernel_send_request(struct virtio_user_dev *dev, enum vhost_user_request req, void *arg) { int ret = -1; unsigned int i; uint64_t req_kernel; - struct vhost_memory_kernel *vm = NULL; int vhostfd; unsigned int queue_sel; @@ -199,24 +244,6 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev, req_kernel = vhost_req_user_to_kernel[req]; - if (req_kernel == VHOST_SET_MEM_TABLE) { - vm = prepare_vhost_memory_kernel(); - if (!vm) - return -1; - arg = (void *)vm; - } - - if (req_kernel == VHOST_SET_FEATURES) { - /* We don't need memory protection here */ - *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); - - /* VHOST kernel does not know about below flags */ - *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; - - *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); - } - switch (req_kernel) { case VHOST_SET_VRING_NUM: case VHOST_SET_VRING_ADDR: @@ -246,24 +273,6 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev, ret = ioctl(vhostfd, req_kernel, arg); } - if (!ret && req_kernel == VHOST_GET_FEATURES) { - /* with tap as the backend, all these features are supported - * but not claimed by vhost-net, so we add them back when - * reporting to upper layer. - */ - *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; - - /* vhost_kernel will not declare this feature, but it does - * support multi-queue. - */ - if (tap_supporte_mq()) - *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); - } - - if (vm) - free(vm); - if (ret < 0) PMD_DRV_LOG(ERR, "%s failed: %s", vhost_msg_strings[req], strerror(errno)); @@ -335,40 +344,65 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, vhostfd = dev->vhostfds[pair_idx]; + if (dev->qp_enabled[pair_idx] == enable) + return 0; + if (!enable) { - if (dev->tapfds[pair_idx] >= 0) { - close(dev->tapfds[pair_idx]); - dev->tapfds[pair_idx] = -1; + tapfd = dev->tapfds[pair_idx]; + if (vhost_kernel_set_backend(vhostfd, -1) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); + return -1; + } + if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) { + PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel"); + return -1; } - return vhost_kernel_set_backend(vhostfd, -1); - } else if (dev->tapfds[pair_idx] >= 0) { + dev->qp_enabled[pair_idx] = false; return 0; } + if (dev->tapfds[pair_idx] >= 0) { + tapfd = dev->tapfds[pair_idx]; + if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) + return -1; + if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { + PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel"); + return -1; + } + goto set_backend; + } + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || (dev->features & (1ULL << VIRTIO_F_VERSION_1))) hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); else hdr_size = sizeof(struct virtio_net_hdr); - tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); + tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, + (char *)dev->mac_addr, dev->features); if (tapfd < 0) { PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); return -1; } + dev->tapfds[pair_idx] = tapfd; + +set_backend: if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); - close(tapfd); return -1; } - dev->tapfds[pair_idx] = tapfd; + dev->qp_enabled[pair_idx] = true; return 0; } -struct virtio_user_backend_ops ops_kernel = { +struct virtio_user_backend_ops virtio_ops_kernel = { .setup = vhost_kernel_setup, - .send_request = vhost_kernel_ioctl, + .set_owner = vhost_kernel_set_owner, + .get_features = vhost_kernel_get_features, + .set_features = vhost_kernel_set_features, + .set_memory_table = vhost_kernel_set_memory_table, + .send_request = vhost_kernel_send_request, .enable_qp = vhost_kernel_enable_queue_pair };