net/virtio-user: fix init when using existing tap
authorDavid Marchand <david.marchand@redhat.com>
Tue, 28 Sep 2021 08:51:14 +0000 (10:51 +0200)
committerMaxime Coquelin <maxime.coquelin@redhat.com>
Tue, 28 Sep 2021 19:23:00 +0000 (21:23 +0200)
When attaching to an existing mono queue tap, the virtio-user was not
reporting that the virtio device was not properly initialised which
prevented from starting the port later.

$ ip tuntap add test mode tap
$ dpdk-testpmd --vdev \
  net_virtio_user0,iface=test,path=/dev/vhost-net,queues=2 -- -i

...
virtio_user_dev_init_mac(): (/dev/vhost-net) No valid MAC in devargs or
device, use random
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_user_start_device(): (/dev/vhost-net) Failed to start device
...
Configuring Port 0 (socket 0)
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_set_multiple_queues(): Multiqueue configured but send command
failed, this is too late now...
Fail to start port 0: Invalid argument
Please stop the ports first
Done

The virtio-user with vhost-kernel backend was going through a lot
of complications to initialise tap fds only when using them.

For each qp enabled for the first time, a tapfd was created via
TUNSETIFF with unneeded additional steps (see below) and then mapped to
the right qp in the vhost-net backend.
Unneeded steps (as long as it has been done once for the port):
- tap features were queried while this is a constant on a running
  system,
- the device name in DPDK was updated,
- the mac address of the tap was set,

On subsequent qps state change, the vhost-net backend fd mapping was
updated and the associated queue/tapfd were disabled/enabled via
TUNSETQUEUE.

Now, this patch simplifies the whole logic by keeping all tapfds opened
and in enabled state (from the tap point of view) at all time.

Unused ioctl defines are removed.

Tap features are validated earlier to fail initialisation asap.
Tap name discovery and mac address configuration are moved when
configuring qp 0.

To support attaching to mono queue tap, the virtio-user driver now tries
to attach in multi queue first, then fallbacks to mono queue.

Finally (but this is more for consistency), VIRTIO_NET_F_MQ feature is
exposed only if the underlying tap supports multi queue.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
drivers/net/virtio/virtio_user/vhost_kernel.c
drivers/net/virtio/virtio_user/vhost_kernel_tap.c
drivers/net/virtio/virtio_user/vhost_kernel_tap.h

index d65f89e..202a8cd 100644 (file)
@@ -120,9 +120,9 @@ vhost_kernel_set_owner(struct virtio_user_dev *dev)
 static int
 vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
 {
-       int ret;
-       unsigned int tap_features;
        struct vhost_kernel_data *data = dev->backend_data;
+       unsigned int tap_flags;
+       int ret;
 
        ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features);
        if (ret < 0) {
@@ -130,7 +130,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
                return -1;
        }
 
-       ret = tap_support_features(&tap_features);
+       ret = tap_get_flags(data->tapfds[0], &tap_flags);
        if (ret < 0) {
                PMD_DRV_LOG(ERR, "Failed to get TAP features");
                return -1;
@@ -140,7 +140,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
         * but not claimed by vhost-net, so we add them back when
         * reporting to upper layer.
         */
-       if (tap_features & IFF_VNET_HDR) {
+       if (tap_flags & IFF_VNET_HDR) {
                *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
                *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
        }
@@ -148,7 +148,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
        /* vhost_kernel will not declare this feature, but it does
         * support multi-queue.
         */
-       if (tap_features & IFF_MULTI_QUEUE)
+       if (tap_flags & IFF_MULTI_QUEUE)
                *features |= (1ull << VIRTIO_NET_F_MQ);
 
        return 0;
@@ -380,9 +380,20 @@ vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status
 static int
 vhost_kernel_setup(struct virtio_user_dev *dev)
 {
-       int vhostfd;
-       uint32_t q, i;
        struct vhost_kernel_data *data;
+       unsigned int tap_features;
+       unsigned int tap_flags;
+       const char *ifname;
+       uint32_t q, i;
+       int vhostfd;
+
+       if (tap_support_features(&tap_features) < 0)
+               return -1;
+
+       if ((tap_features & IFF_VNET_HDR) == 0) {
+               PMD_INIT_LOG(ERR, "TAP does not support IFF_VNET_HDR");
+               return -1;
+       }
 
        data = malloc(sizeof(*data));
        if (!data) {
@@ -414,18 +425,43 @@ vhost_kernel_setup(struct virtio_user_dev *dev)
                        PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno));
                        goto err_tapfds;
                }
-
                data->vhostfds[i] = vhostfd;
        }
 
+       ifname = dev->ifname != NULL ? dev->ifname : "tap%d";
+       data->tapfds[0] = tap_open(ifname, (tap_features & IFF_MULTI_QUEUE) != 0);
+       if (data->tapfds[0] < 0)
+               goto err_tapfds;
+       if (dev->ifname == NULL && tap_get_name(data->tapfds[0], &dev->ifname) < 0) {
+               PMD_DRV_LOG(ERR, "fail to get tap name (%d)", data->tapfds[0]);
+               goto err_tapfds;
+       }
+       if (tap_get_flags(data->tapfds[0], &tap_flags) < 0) {
+               PMD_DRV_LOG(ERR, "fail to get tap flags for tap %s", dev->ifname);
+               goto err_tapfds;
+       }
+       if ((tap_flags & IFF_MULTI_QUEUE) == 0 && dev->max_queue_pairs > 1) {
+               PMD_DRV_LOG(ERR, "tap %s does not support multi queue", dev->ifname);
+               goto err_tapfds;
+       }
+
+       for (i = 1; i < dev->max_queue_pairs; i++) {
+               data->tapfds[i] = tap_open(dev->ifname, true);
+               if (data->tapfds[i] < 0)
+                       goto err_tapfds;
+       }
+
        dev->backend_data = data;
 
        return 0;
 
 err_tapfds:
-       for (i = 0; i < dev->max_queue_pairs; i++)
+       for (i = 0; i < dev->max_queue_pairs; i++) {
                if (data->vhostfds[i] >= 0)
                        close(data->vhostfds[i]);
+               if (data->tapfds[i] >= 0)
+                       close(data->tapfds[i]);
+       }
 
        free(data->tapfds);
 err_vhostfds:
@@ -488,58 +524,42 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
                               uint16_t pair_idx,
                               int enable)
 {
+       struct vhost_kernel_data *data = dev->backend_data;
        int hdr_size;
        int vhostfd;
        int tapfd;
-       int req_mq = (dev->max_queue_pairs > 1);
-       struct vhost_kernel_data *data = dev->backend_data;
-
-       vhostfd = data->vhostfds[pair_idx];
 
        if (dev->qp_enabled[pair_idx] == enable)
                return 0;
 
+       vhostfd = data->vhostfds[pair_idx];
+       tapfd = data->tapfds[pair_idx];
+
        if (!enable) {
-               tapfd = data->tapfds[pair_idx];
                if (vhost_kernel_set_backend(vhostfd, -1) < 0) {
                        PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
                        return -1;
                }
-               if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) {
-                       PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel");
-                       return -1;
-               }
                dev->qp_enabled[pair_idx] = false;
                return 0;
        }
 
-       if (data->tapfds[pair_idx] >= 0) {
-               tapfd = data->tapfds[pair_idx];
-               if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1)
-                       return -1;
-               if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) {
-                       PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel");
-                       return -1;
-               }
-               goto set_backend;
-       }
-
        if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
            (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
                hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        else
                hdr_size = sizeof(struct virtio_net_hdr);
 
-       tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq,
-                        (char *)dev->mac_addr, dev->features);
-       if (tapfd < 0) {
-               PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
+       /* Set mac on tap only once when starting */
+       if (!dev->started && pair_idx == 0 &&
+                       tap_set_mac(data->tapfds[pair_idx], dev->mac_addr) < 0)
                return -1;
-       }
 
-       data->tapfds[pair_idx] = tapfd;
+       if (vhost_kernel_tap_setup(tapfd, hdr_size, dev->features) < 0) {
+               PMD_DRV_LOG(ERR, "fail to setup tap for vhost kernel");
+               return -1;
+       }
 
-set_backend:
        if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
                PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
                return -1;
index 7ef99c6..2aac402 100644 (file)
@@ -42,6 +42,91 @@ tap_support_features(unsigned int *tap_features)
 }
 
 int
+tap_open(const char *ifname, bool multi_queue)
+{
+       struct ifreq ifr;
+       int tapfd;
+
+       tapfd = open(PATH_NET_TUN, O_RDWR);
+       if (tapfd < 0) {
+               PMD_DRV_LOG(ERR, "fail to open %s: %s", PATH_NET_TUN, strerror(errno));
+               return -1;
+       }
+       if (fcntl(tapfd, F_SETFL, O_NONBLOCK) < 0) {
+               PMD_DRV_LOG(ERR, "fcntl tapfd failed: %s", strerror(errno));
+               close(tapfd);
+               return -1;
+       }
+
+retry_mono_q:
+       memset(&ifr, 0, sizeof(ifr));
+       strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1);
+       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+       if (multi_queue)
+               ifr.ifr_flags |= IFF_MULTI_QUEUE;
+       if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) {
+               if (multi_queue) {
+                       PMD_DRV_LOG(DEBUG,
+                               "TUNSETIFF failed (will retry without IFF_MULTI_QUEUE): %s",
+                               strerror(errno));
+                       multi_queue = false;
+                       goto retry_mono_q;
+               }
+
+               PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno));
+               close(tapfd);
+               tapfd = -1;
+       }
+       return tapfd;
+}
+
+int
+tap_get_name(int tapfd, char **name)
+{
+       struct ifreq ifr;
+       int ret;
+
+       memset(&ifr, 0, sizeof(ifr));
+       if (ioctl(tapfd, TUNGETIFF, (void *)&ifr) == -1) {
+               PMD_DRV_LOG(ERR, "TUNGETIFF failed: %s", strerror(errno));
+               return -1;
+       }
+       ret = asprintf(name, "%s", ifr.ifr_name);
+       if (ret != -1)
+               ret = 0;
+       return ret;
+}
+
+int
+tap_get_flags(int tapfd, unsigned int *tap_flags)
+{
+       struct ifreq ifr;
+
+       memset(&ifr, 0, sizeof(ifr));
+       if (ioctl(tapfd, TUNGETIFF, (void *)&ifr) == -1) {
+               PMD_DRV_LOG(ERR, "TUNGETIFF failed: %s", strerror(errno));
+               return -1;
+       }
+       *tap_flags = ifr.ifr_flags;
+       return 0;
+}
+
+int
+tap_set_mac(int tapfd, uint8_t *mac)
+{
+       struct ifreq ifr;
+
+       memset(&ifr, 0, sizeof(ifr));
+       ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+       memcpy(ifr.ifr_hwaddr.sa_data, mac, RTE_ETHER_ADDR_LEN);
+       if (ioctl(tapfd, SIOCSIFHWADDR, (void *)&ifr) == -1) {
+               PMD_DRV_LOG(ERR, "SIOCSIFHWADDR failed: %s", strerror(errno));
+               return -1;
+       }
+       return 0;
+}
+
+static int
 vhost_kernel_tap_set_offload(int fd, uint64_t features)
 {
        unsigned int offload = 0;
@@ -79,24 +164,9 @@ vhost_kernel_tap_set_offload(int fd, uint64_t features)
 }
 
 int
-vhost_kernel_tap_set_queue(int fd, bool attach)
-{
-       struct ifreq ifr = {
-               .ifr_flags = attach ? IFF_ATTACH_QUEUE : IFF_DETACH_QUEUE,
-       };
-
-       return ioctl(fd, TUNSETQUEUE, &ifr);
-}
-
-int
-vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
-                        const char *mac, uint64_t features)
+vhost_kernel_tap_setup(int tapfd, int hdr_size, uint64_t features)
 {
-       unsigned int tap_features;
-       char *tap_name = NULL;
        int sndbuf = INT_MAX;
-       struct ifreq ifr;
-       int tapfd;
        int ret;
 
        /* TODO:
@@ -104,86 +174,18 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
         * 2. get number of memory regions from vhost module parameter
         * max_mem_regions, supported in newer version linux kernel
         */
-       tapfd = open(PATH_NET_TUN, O_RDWR);
-       if (tapfd < 0) {
-               PMD_DRV_LOG(ERR, "fail to open %s: %s",
-                           PATH_NET_TUN, strerror(errno));
-               return -1;
-       }
-
-       /* Construct ifr */
-       memset(&ifr, 0, sizeof(ifr));
-       ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-
-       if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
-               PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
-               goto error;
-       }
-       if (tap_features & IFF_ONE_QUEUE)
-               ifr.ifr_flags |= IFF_ONE_QUEUE;
-
-       /* Let tap instead of vhost-net handle vnet header, as the latter does
-        * not support offloading. And in this case, we should not set feature
-        * bit VHOST_NET_F_VIRTIO_NET_HDR.
-        */
-       if (tap_features & IFF_VNET_HDR) {
-               ifr.ifr_flags |= IFF_VNET_HDR;
-       } else {
-               PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR");
-               goto error;
-       }
-
-       if (req_mq)
-               ifr.ifr_flags |= IFF_MULTI_QUEUE;
-
-       if (*p_ifname)
-               strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ - 1);
-       else
-               strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ - 1);
-       if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) {
-               PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno));
-               goto error;
-       }
-
-       tap_name = strdup(ifr.ifr_name);
-       if (!tap_name) {
-               PMD_DRV_LOG(ERR, "strdup ifname failed: %s", strerror(errno));
-               goto error;
-       }
-
-       if (fcntl(tapfd, F_SETFL, O_NONBLOCK) < 0) {
-               PMD_DRV_LOG(ERR, "fcntl tapfd failed: %s", strerror(errno));
-               goto error;
-       }
-
        if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) {
                PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno));
-               goto error;
+               return -1;
        }
 
        if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) {
                PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno));
-               goto error;
+               return -1;
        }
 
        ret = vhost_kernel_tap_set_offload(tapfd, features);
-       if (ret < 0 && ret != -ENOTSUP)
-               goto error;
-
-       memset(&ifr, 0, sizeof(ifr));
-       ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
-       memcpy(ifr.ifr_hwaddr.sa_data, mac, RTE_ETHER_ADDR_LEN);
-       if (ioctl(tapfd, SIOCSIFHWADDR, (void *)&ifr) == -1) {
-               PMD_DRV_LOG(ERR, "SIOCSIFHWADDR failed: %s", strerror(errno));
-               goto error;
-       }
-
-       free(*p_ifname);
-       *p_ifname = tap_name;
-
-       return tapfd;
-error:
-       free(tap_name);
-       close(tapfd);
-       return -1;
+       if (ret == -ENOTSUP)
+               ret = 0;
+       return ret;
 }
index ed03fce..726433c 100644 (file)
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
-#define TUNSETQUEUE  _IOW('T', 217, int)
-#define TUNSETVNETLE _IOW('T', 220, int)
-#define TUNSETVNETBE _IOW('T', 222, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TAP          0x0002
 #define IFF_NO_PI        0x1000
-#define IFF_ONE_QUEUE    0x2000
 #define IFF_VNET_HDR     0x4000
 #define IFF_MULTI_QUEUE  0x0100
-#define IFF_ATTACH_QUEUE 0x0200
-#define IFF_DETACH_QUEUE 0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM     0x01    /* You can hand me unchecksummed packets. */
 /* Constants */
 #define PATH_NET_TUN   "/dev/net/tun"
 
-int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
-                        const char *mac, uint64_t features);
-int vhost_kernel_tap_set_offload(int fd, uint64_t features);
-int vhost_kernel_tap_set_queue(int fd, bool attach);
+int vhost_kernel_tap_setup(int tapfd, int hdr_size, uint64_t features);
+
 int tap_support_features(unsigned int *tap_features);
+int tap_open(const char *ifname, bool multi_queue);
+int tap_get_name(int tapfd, char **ifname);
+int tap_get_flags(int tapfd, unsigned int *tap_flags);
+int tap_set_mac(int tapfd, uint8_t *mac);
 
 #endif