net/virtio: add virtio-user memory tables ops
[dpdk.git] / drivers / net / virtio / virtio_user / vhost_kernel.c
index 5c81e8d..69f932b 100644 (file)
@@ -6,6 +6,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <errno.h>
 
 #include <rte_memory.h>
 
@@ -37,6 +38,28 @@ struct vhost_memory_kernel {
 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
 
+/* with below features, vhost kernel does not need to do the checksum and TSO,
+ * these info will be passed to virtio_user through virtio net header.
+ */
+#define VHOST_KERNEL_GUEST_OFFLOADS_MASK       \
+       ((1ULL << VIRTIO_NET_F_GUEST_CSUM) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |    \
+        (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
+/* with below features, when flows from virtio_user to vhost kernel
+ * (1) if flows goes up through the kernel networking stack, it does not need
+ * to verify checksum, which can save CPU cycles;
+ * (2) if flows goes through a Linux bridge and outside from an interface
+ * (kernel driver), checksum and TSO will be done by GSO in kernel or even
+ * offloaded into real physical device.
+ */
+#define VHOST_KERNEL_HOST_OFFLOADS_MASK                \
+       ((1ULL << VIRTIO_NET_F_HOST_TSO4) |     \
+        (1ULL << VIRTIO_NET_F_HOST_TSO6) |     \
+        (1ULL << VIRTIO_NET_F_CSUM))
+
 static uint64_t max_regions = 64;
 
 static void
@@ -55,19 +78,75 @@ get_vhost_kernel_max_regions(void)
        close(fd);
 }
 
-static uint64_t vhost_req_user_to_kernel[] = {
-       [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
-       [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
-       [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES,
-       [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES,
-       [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL,
-       [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM,
-       [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE,
-       [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE,
-       [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
-       [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK,
-       [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE,
-};
+static int
+vhost_kernel_ioctl(int fd, uint64_t request, void *arg)
+{
+       int ret;
+
+       ret = ioctl(fd, request, arg);
+       if (ret) {
+               PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)",
+                               request, strerror(errno));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+vhost_kernel_set_owner(struct virtio_user_dev *dev)
+{
+       return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_OWNER, NULL);
+}
+
+static int
+vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
+{
+       int ret;
+       unsigned int tap_features;
+
+       ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_GET_FEATURES, features);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "Failed to get features");
+               return -1;
+       }
+
+       ret = tap_support_features(&tap_features);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "Failed to get TAP features");
+               return -1;
+       }
+
+       /* with tap as the backend, all these features are supported
+        * but not claimed by vhost-net, so we add them back when
+        * reporting to upper layer.
+        */
+       if (tap_features & IFF_VNET_HDR) {
+               *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+               *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
+       }
+
+       /* vhost_kernel will not declare this feature, but it does
+        * support multi-queue.
+        */
+       if (tap_features & IFF_MULTI_QUEUE)
+               *features |= (1ull << VIRTIO_NET_F_MQ);
+
+       return 0;
+}
+
+static int
+vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features)
+{
+       /* We don't need memory protection here */
+       features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+       /* VHOST kernel does not know about below flags */
+       features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+       features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
+       features &= ~(1ULL << VIRTIO_NET_F_MQ);
+
+       return vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_FEATURES, &features);
+}
 
 static int
 add_memseg_list(const struct rte_memseg_list *msl, void *arg)
@@ -103,16 +182,17 @@ add_memseg_list(const struct rte_memseg_list *msl, void *arg)
  * have much more memory regions. Below function will treat each
  * contiguous memory space reserved by DPDK as one region.
  */
-static struct vhost_memory_kernel *
-prepare_vhost_memory_kernel(void)
+static int
+vhost_kernel_set_memory_table(struct virtio_user_dev *dev)
 {
        struct vhost_memory_kernel *vm;
+       int ret;
 
        vm = malloc(sizeof(struct vhost_memory_kernel) +
                        max_regions *
                        sizeof(struct vhost_memory_region));
        if (!vm)
-               return NULL;
+               goto err;
 
        vm->nregions = 0;
        vm->padding = 0;
@@ -121,94 +201,49 @@ prepare_vhost_memory_kernel(void)
         * The memory lock has already been taken by memory subsystem
         * or virtio_user_start_device().
         */
-       if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) {
-               free(vm);
-               return NULL;
-       }
-
-       return vm;
-}
-
-/* with below features, vhost kernel does not need to do the checksum and TSO,
- * these info will be passed to virtio_user through virtio net header.
- */
-#define VHOST_KERNEL_GUEST_OFFLOADS_MASK       \
-       ((1ULL << VIRTIO_NET_F_GUEST_CSUM) |    \
-        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |    \
-        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |    \
-        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |    \
-        (1ULL << VIRTIO_NET_F_GUEST_UFO))
-
-/* with below features, when flows from virtio_user to vhost kernel
- * (1) if flows goes up through the kernel networking stack, it does not need
- * to verify checksum, which can save CPU cycles;
- * (2) if flows goes through a Linux bridge and outside from an interface
- * (kernel driver), checksum and TSO will be done by GSO in kernel or even
- * offloaded into real physical device.
- */
-#define VHOST_KERNEL_HOST_OFFLOADS_MASK                \
-       ((1ULL << VIRTIO_NET_F_HOST_TSO4) |     \
-        (1ULL << VIRTIO_NET_F_HOST_TSO6) |     \
-        (1ULL << VIRTIO_NET_F_CSUM))
-
-static unsigned int
-tap_support_features(void)
-{
-       int tapfd;
-       unsigned int tap_features;
+       ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm);
+       if (ret < 0)
+               goto err_free;
 
-       tapfd = open(PATH_NET_TUN, O_RDWR);
-       if (tapfd < 0) {
-               PMD_DRV_LOG(ERR, "fail to open %s: %s",
-                           PATH_NET_TUN, strerror(errno));
-               return -1;
-       }
+       ret = vhost_kernel_ioctl(dev->vhostfds[0], VHOST_SET_MEM_TABLE, vm);
+       if (ret < 0)
+               goto err_free;
 
-       if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
-               PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
-               close(tapfd);
-               return -1;
-       }
+       free(vm);
 
-       close(tapfd);
-       return tap_features;
+       return 0;
+err_free:
+       free(vm);
+err:
+       PMD_DRV_LOG(ERR, "Failed to set memory table");
+       return -1;
 }
 
+static uint64_t vhost_req_user_to_kernel[] = {
+       [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
+       [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL,
+       [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM,
+       [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE,
+       [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE,
+       [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
+       [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK,
+};
+
 static int
-vhost_kernel_ioctl(struct virtio_user_dev *dev,
+vhost_kernel_send_request(struct virtio_user_dev *dev,
                   enum vhost_user_request req,
                   void *arg)
 {
        int ret = -1;
        unsigned int i;
        uint64_t req_kernel;
-       struct vhost_memory_kernel *vm = NULL;
        int vhostfd;
        unsigned int queue_sel;
-       unsigned int features;
 
        PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
 
        req_kernel = vhost_req_user_to_kernel[req];
 
-       if (req_kernel == VHOST_SET_MEM_TABLE) {
-               vm = prepare_vhost_memory_kernel();
-               if (!vm)
-                       return -1;
-               arg = (void *)vm;
-       }
-
-       if (req_kernel == VHOST_SET_FEATURES) {
-               /* We don't need memory protection here */
-               *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
-
-               /* VHOST kernel does not know about below flags */
-               *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
-               *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
-
-               *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ);
-       }
-
        switch (req_kernel) {
        case VHOST_SET_VRING_NUM:
        case VHOST_SET_VRING_ADDR:
@@ -238,27 +273,6 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev,
                ret = ioctl(vhostfd, req_kernel, arg);
        }
 
-       if (!ret && req_kernel == VHOST_GET_FEATURES) {
-               features = tap_support_features();
-               /* with tap as the backend, all these features are supported
-                * but not claimed by vhost-net, so we add them back when
-                * reporting to upper layer.
-                */
-               if (features & IFF_VNET_HDR) {
-                       *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
-                       *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
-               }
-
-               /* vhost_kernel will not declare this feature, but it does
-                * support multi-queue.
-                */
-               if (features & IFF_MULTI_QUEUE)
-                       *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ);
-       }
-
-       if (vm)
-               free(vm);
-
        if (ret < 0)
                PMD_DRV_LOG(ERR, "%s failed: %s",
                            vhost_msg_strings[req], strerror(errno));
@@ -330,16 +344,34 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
 
        vhostfd = dev->vhostfds[pair_idx];
 
+       if (dev->qp_enabled[pair_idx] == enable)
+               return 0;
+
        if (!enable) {
-               if (dev->tapfds[pair_idx] >= 0) {
-                       close(dev->tapfds[pair_idx]);
-                       dev->tapfds[pair_idx] = -1;
+               tapfd = dev->tapfds[pair_idx];
+               if (vhost_kernel_set_backend(vhostfd, -1) < 0) {
+                       PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
+                       return -1;
+               }
+               if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) {
+                       PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel");
+                       return -1;
                }
-               return vhost_kernel_set_backend(vhostfd, -1);
-       } else if (dev->tapfds[pair_idx] >= 0) {
+               dev->qp_enabled[pair_idx] = false;
                return 0;
        }
 
+       if (dev->tapfds[pair_idx] >= 0) {
+               tapfd = dev->tapfds[pair_idx];
+               if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1)
+                       return -1;
+               if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) {
+                       PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel");
+                       return -1;
+               }
+               goto set_backend;
+       }
+
        if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
            (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
                hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -353,18 +385,24 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
                return -1;
        }
 
+       dev->tapfds[pair_idx] = tapfd;
+
+set_backend:
        if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
                PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
-               close(tapfd);
                return -1;
        }
 
-       dev->tapfds[pair_idx] = tapfd;
+       dev->qp_enabled[pair_idx] = true;
        return 0;
 }
 
 struct virtio_user_backend_ops virtio_ops_kernel = {
        .setup = vhost_kernel_setup,
-       .send_request = vhost_kernel_ioctl,
+       .set_owner = vhost_kernel_set_owner,
+       .get_features = vhost_kernel_get_features,
+       .set_features = vhost_kernel_set_features,
+       .set_memory_table = vhost_kernel_set_memory_table,
+       .send_request = vhost_kernel_send_request,
        .enable_qp = vhost_kernel_enable_queue_pair
 };