vm = malloc(sizeof(struct vhost_memory_kernel) +
max_regions *
sizeof(struct vhost_memory_region));
+ if (!vm)
+ return NULL;
for (i = 0; i < RTE_MAX_MEMSEG; ++i) {
seg = &rte_eal_get_configuration()->mem_config->memseg[i];
return vm;
}
+/* with below features, vhost kernel does not need to do the checksum and TSO,
+ * these info will be passed to virtio_user through virtio net header.
+ */
+#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \
+ ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
+ (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
+/* with below features, when flows from virtio_user to vhost kernel
+ * (1) if flows goes up through the kernel networking stack, it does not need
+ * to verify checksum, which can save CPU cycles;
+ * (2) if flows goes through a Linux bridge and outside from an interface
+ * (kernel driver), checksum and TSO will be done by GSO in kernel or even
+ * offloaded into real physical device.
+ */
+#define VHOST_KERNEL_HOST_OFFLOADS_MASK \
+ ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_CSUM))
+
+static int
+tap_supporte_mq(void)
+{
+ int tapfd;
+ unsigned int tap_features;
+
+ tapfd = open(PATH_NET_TUN, O_RDWR);
+ if (tapfd < 0) {
+ PMD_DRV_LOG(ERR, "fail to open %s: %s",
+ PATH_NET_TUN, strerror(errno));
+ return -1;
+ }
+
+ if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
+ PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
+ close(tapfd);
+ return -1;
+ }
+
+ close(tapfd);
+ return tap_features & IFF_MULTI_QUEUE;
+}
+
static int
vhost_kernel_ioctl(struct virtio_user_dev *dev,
enum vhost_user_request req,
unsigned int i;
uint64_t req_kernel;
struct vhost_memory_kernel *vm = NULL;
+ int vhostfd;
+ unsigned int queue_sel;
PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
arg = (void *)vm;
}
- /* We don't need memory protection here */
- if (req_kernel == VHOST_SET_FEATURES)
+ if (req_kernel == VHOST_SET_FEATURES) {
+ /* We don't need memory protection here */
*(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
- for (i = 0; i < dev->max_queue_pairs; ++i) {
- if (dev->vhostfds[i] < 0)
- continue;
+ /* VHOST kernel does not know about below flags */
+ *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+ *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
- ret = ioctl(dev->vhostfds[i], req_kernel, arg);
- if (ret < 0)
- break;
+ *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ);
+ }
+
+ switch (req_kernel) {
+ case VHOST_SET_VRING_NUM:
+ case VHOST_SET_VRING_ADDR:
+ case VHOST_SET_VRING_BASE:
+ case VHOST_GET_VRING_BASE:
+ case VHOST_SET_VRING_KICK:
+ case VHOST_SET_VRING_CALL:
+ queue_sel = *(unsigned int *)arg;
+ vhostfd = dev->vhostfds[queue_sel / 2];
+ *(unsigned int *)arg = queue_sel % 2;
+ PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u",
+ vhostfd, *(unsigned int *)arg);
+ break;
+ default:
+ vhostfd = -1;
+ }
+ if (vhostfd == -1) {
+ for (i = 0; i < dev->max_queue_pairs; ++i) {
+ if (dev->vhostfds[i] < 0)
+ continue;
+
+ ret = ioctl(dev->vhostfds[i], req_kernel, arg);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ ret = ioctl(vhostfd, req_kernel, arg);
+ }
+
+ if (!ret && req_kernel == VHOST_GET_FEATURES) {
+ /* with tap as the backend, all these features are supported
+ * but not claimed by vhost-net, so we add them back when
+ * reporting to upper layer.
+ */
+ *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+ *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
+
+ /* vhost_kernel will not declare this feature, but it does
+ * support multi-queue.
+ */
+ if (tap_supporte_mq())
+ *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ);
}
if (vm)
int hdr_size;
int vhostfd;
int tapfd;
+ int req_mq = (dev->max_queue_pairs > 1);
vhostfd = dev->vhostfds[pair_idx];
else
hdr_size = sizeof(struct virtio_net_hdr);
- tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size);
+ tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq);
if (tapfd < 0) {
PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
return -1;