X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvirtio%2Fvirtio_user%2Fvhost_kernel.c;h=58e66bb7b4ae01cde92a07215051ea9f0fb92c72;hb=55c1238246d53d196420fd2768dd3d4210ab654b;hp=b79dcad17949f1a2751e1200e3abe2fb31163f6c;hpb=06856cabb89b0db000a065c17e32cdf2d2f58470;p=dpdk.git diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c index b79dcad179..58e66bb7b4 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel.c +++ b/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -14,6 +14,11 @@ #include "virtio_user_dev.h" #include "vhost_kernel_tap.h" +struct vhost_kernel_data { + int *vhostfds; + int *tapfds; +}; + struct vhost_memory_kernel { uint32_t nregions; uint32_t padding; @@ -38,6 +43,28 @@ struct vhost_memory_kernel { #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + static uint64_t max_regions = 64; static void @@ -74,21 +101,73 @@ vhost_kernel_ioctl(int fd, uint64_t request, void *arg) static int vhost_kernel_set_owner(struct virtio_user_dev *dev) { - return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_OWNER, NULL); + int ret; + uint32_t i; + struct vhost_kernel_data *data = dev->backend_data; + + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (data->vhostfds[i] < 0) + continue; + + ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_OWNER, NULL); + if (ret < 0) + return ret; + } + + return 0; } -static uint64_t vhost_req_user_to_kernel[] = { - [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, - [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, - [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, - [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, - [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, - [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, - [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, - [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, - [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, - [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, -}; +static int +vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features) +{ + int ret; + unsigned int tap_features; + struct vhost_kernel_data *data = dev->backend_data; + + ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get features"); + return -1; + } + + ret = tap_support_features(&tap_features); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to get TAP features"); + return -1; + } + + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + if (tap_features & IFF_VNET_HDR) { + *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + } + + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_features & IFF_MULTI_QUEUE) + *features |= (1ull << VIRTIO_NET_F_MQ); + + return 0; +} + +static int +vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features) +{ + struct vhost_kernel_data *data = dev->backend_data; + + /* We don't need memory protection here */ + features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + /* VHOST kernel does not know about below flags */ + features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + features &= ~(1ULL << VIRTIO_NET_F_MQ); + + return vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_FEATURES, &features); +} static int add_memseg_list(const struct rte_memseg_list *msl, void *arg) @@ -124,16 +203,18 @@ add_memseg_list(const struct rte_memseg_list *msl, void *arg) * have much more memory regions. Below function will treat each * contiguous memory space reserved by DPDK as one region. */ -static struct vhost_memory_kernel * -prepare_vhost_memory_kernel(void) +static int +vhost_kernel_set_memory_table(struct virtio_user_dev *dev) { + struct vhost_kernel_data *data = dev->backend_data; struct vhost_memory_kernel *vm; + int ret; vm = malloc(sizeof(struct vhost_memory_kernel) + max_regions * sizeof(struct vhost_memory_region)); if (!vm) - return NULL; + goto err; vm->nregions = 0; vm->padding = 0; @@ -142,149 +223,134 @@ prepare_vhost_memory_kernel(void) * The memory lock has already been taken by memory subsystem * or virtio_user_start_device(). */ - if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) { - free(vm); - return NULL; - } + ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm); + if (ret < 0) + goto err_free; - return vm; -} + ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_MEM_TABLE, vm); + if (ret < 0) + goto err_free; -/* with below features, vhost kernel does not need to do the checksum and TSO, - * these info will be passed to virtio_user through virtio net header. - */ -#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ - (1ULL << VIRTIO_NET_F_GUEST_UFO)) + free(vm); -/* with below features, when flows from virtio_user to vhost kernel - * (1) if flows goes up through the kernel networking stack, it does not need - * to verify checksum, which can save CPU cycles; - * (2) if flows goes through a Linux bridge and outside from an interface - * (kernel driver), checksum and TSO will be done by GSO in kernel or even - * offloaded into real physical device. - */ -#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ - ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM)) + return 0; +err_free: + free(vm); +err: + PMD_DRV_LOG(ERR, "Failed to set memory table"); + return -1; +} -static unsigned int -tap_support_features(void) +static int +vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state) { - int tapfd; - unsigned int tap_features; + int ret, fd; + unsigned int index = state->index; + struct vhost_kernel_data *data = dev->backend_data; - tapfd = open(PATH_NET_TUN, O_RDWR); - if (tapfd < 0) { - PMD_DRV_LOG(ERR, "fail to open %s: %s", - PATH_NET_TUN, strerror(errno)); - return -1; - } + /* Convert from queue index to queue-pair & offset */ + fd = data->vhostfds[state->index / 2]; + state->index %= 2; - if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { - PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); - close(tapfd); + ret = vhost_kernel_ioctl(fd, req, state); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req); return -1; } - close(tapfd); - return tap_features; + /* restore index back to queue index */ + state->index = index; + + return 0; } static int -vhost_kernel_send_request(struct virtio_user_dev *dev, - enum vhost_user_request req, - void *arg) +vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state) { - int ret = -1; - unsigned int i; - uint64_t req_kernel; - struct vhost_memory_kernel *vm = NULL; - int vhostfd; - unsigned int queue_sel; - unsigned int features; + return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state); +} + +static int +vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) +{ + return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state); +} - PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); +static int +vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) +{ + return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state); +} - req_kernel = vhost_req_user_to_kernel[req]; +static int +vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req, + struct vhost_vring_file *file) +{ + int ret, fd; + unsigned int index = file->index; + struct vhost_kernel_data *data = dev->backend_data; - if (req_kernel == VHOST_SET_MEM_TABLE) { - vm = prepare_vhost_memory_kernel(); - if (!vm) - return -1; - arg = (void *)vm; + /* Convert from queue index to queue-pair & offset */ + fd = data->vhostfds[file->index / 2]; + file->index %= 2; + + ret = vhost_kernel_ioctl(fd, req, file); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req); + return -1; } - if (req_kernel == VHOST_SET_FEATURES) { - /* We don't need memory protection here */ - *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + /* restore index back to queue index */ + file->index = index; - /* VHOST kernel does not know about below flags */ - *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + return 0; +} - *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); - } +static int +vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file) +{ + return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file); +} - switch (req_kernel) { - case VHOST_SET_VRING_NUM: - case VHOST_SET_VRING_ADDR: - case VHOST_SET_VRING_BASE: - case VHOST_GET_VRING_BASE: - case VHOST_SET_VRING_KICK: - case VHOST_SET_VRING_CALL: - queue_sel = *(unsigned int *)arg; - vhostfd = dev->vhostfds[queue_sel / 2]; - *(unsigned int *)arg = queue_sel % 2; - PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", - vhostfd, *(unsigned int *)arg); - break; - default: - vhostfd = -1; - } - if (vhostfd == -1) { - for (i = 0; i < dev->max_queue_pairs; ++i) { - if (dev->vhostfds[i] < 0) - continue; - - ret = ioctl(dev->vhostfds[i], req_kernel, arg); - if (ret < 0) - break; - } - } else { - ret = ioctl(vhostfd, req_kernel, arg); - } +static int +vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file) +{ + return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file); +} - if (!ret && req_kernel == VHOST_GET_FEATURES) { - features = tap_support_features(); - /* with tap as the backend, all these features are supported - * but not claimed by vhost-net, so we add them back when - * reporting to upper layer. - */ - if (features & IFF_VNET_HDR) { - *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; - } +static int +vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr) +{ + int ret, fd; + unsigned int index = addr->index; + struct vhost_kernel_data *data = dev->backend_data; + + /* Convert from queue index to queue-pair & offset */ + fd = data->vhostfds[addr->index / 2]; + addr->index %= 2; - /* vhost_kernel will not declare this feature, but it does - * support multi-queue. - */ - if (features & IFF_MULTI_QUEUE) - *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); + ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to set vring address"); + return -1; } - if (vm) - free(vm); + /* restore index back to queue index */ + addr->index = index; - if (ret < 0) - PMD_DRV_LOG(ERR, "%s failed: %s", - vhost_msg_strings[req], strerror(errno)); + return 0; +} - return ret; +static int +vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused) +{ + return -ENOTSUP; +} + +static int +vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused) +{ + return -ENOTSUP; } /** @@ -298,21 +364,82 @@ static int vhost_kernel_setup(struct virtio_user_dev *dev) { int vhostfd; - uint32_t i; + uint32_t q, i; + struct vhost_kernel_data *data; + + data = malloc(sizeof(*data)); + if (!data) { + PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path); + return -1; + } + + data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); + if (!data->vhostfds) { + PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path); + goto err_data; + } + data->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); + if (!data->tapfds) { + PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path); + goto err_vhostfds; + } + + for (q = 0; q < dev->max_queue_pairs; ++q) { + data->vhostfds[q] = -1; + data->tapfds[q] = -1; + } get_vhost_kernel_max_regions(); for (i = 0; i < dev->max_queue_pairs; ++i) { vhostfd = open(dev->path, O_RDWR); if (vhostfd < 0) { - PMD_DRV_LOG(ERR, "fail to open %s, %s", - dev->path, strerror(errno)); - return -1; + PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno)); + goto err_tapfds; } - dev->vhostfds[i] = vhostfd; + data->vhostfds[i] = vhostfd; } + dev->backend_data = data; + + return 0; + +err_tapfds: + for (i = 0; i < dev->max_queue_pairs; i++) + if (data->vhostfds[i] >= 0) + close(data->vhostfds[i]); + + free(data->tapfds); +err_vhostfds: + free(data->vhostfds); +err_data: + free(data); + + return -1; +} + +static int +vhost_kernel_destroy(struct virtio_user_dev *dev) +{ + struct vhost_kernel_data *data = dev->backend_data; + uint32_t i; + + if (!data) + return 0; + + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (data->vhostfds[i] >= 0) + close(data->vhostfds[i]); + if (data->tapfds[i] >= 0) + close(data->tapfds[i]); + } + + free(data->vhostfds); + free(data->tapfds); + free(data); + dev->backend_data = NULL; + return 0; } @@ -348,14 +475,15 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, int vhostfd; int tapfd; int req_mq = (dev->max_queue_pairs > 1); + struct vhost_kernel_data *data = dev->backend_data; - vhostfd = dev->vhostfds[pair_idx]; + vhostfd = data->vhostfds[pair_idx]; if (dev->qp_enabled[pair_idx] == enable) return 0; if (!enable) { - tapfd = dev->tapfds[pair_idx]; + tapfd = data->tapfds[pair_idx]; if (vhost_kernel_set_backend(vhostfd, -1) < 0) { PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); return -1; @@ -368,8 +496,8 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, return 0; } - if (dev->tapfds[pair_idx] >= 0) { - tapfd = dev->tapfds[pair_idx]; + if (data->tapfds[pair_idx] >= 0) { + tapfd = data->tapfds[pair_idx]; if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) return -1; if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { @@ -392,7 +520,7 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, return -1; } - dev->tapfds[pair_idx] = tapfd; + data->tapfds[pair_idx] = tapfd; set_backend: if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { @@ -404,9 +532,45 @@ set_backend: return 0; } +static int +vhost_kernel_get_backend_features(uint64_t *features) +{ + *features = 0; + + return 0; +} + +static int +vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused) +{ + /* Nothing to update (Maybe get TAP interface link state?) */ + return 0; +} + +static int +vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused) +{ + /* No link state interrupt with Vhost-kernel */ + return -1; +} + struct virtio_user_backend_ops virtio_ops_kernel = { .setup = vhost_kernel_setup, + .destroy = vhost_kernel_destroy, + .get_backend_features = vhost_kernel_get_backend_features, .set_owner = vhost_kernel_set_owner, - .send_request = vhost_kernel_send_request, - .enable_qp = vhost_kernel_enable_queue_pair + .get_features = vhost_kernel_get_features, + .set_features = vhost_kernel_set_features, + .set_memory_table = vhost_kernel_set_memory_table, + .set_vring_num = vhost_kernel_set_vring_num, + .set_vring_base = vhost_kernel_set_vring_base, + .get_vring_base = vhost_kernel_get_vring_base, + .set_vring_call = vhost_kernel_set_vring_call, + .set_vring_kick = vhost_kernel_set_vring_kick, + .set_vring_addr = vhost_kernel_set_vring_addr, + .get_status = vhost_kernel_get_status, + .set_status = vhost_kernel_set_status, + .enable_qp = vhost_kernel_enable_queue_pair, + .update_link_state = vhost_kernel_update_link_state, + .get_intr_fd = vhost_kernel_get_intr_fd, };