From 89aac60e0be9ed95a87b16e3595f102f9faaffb4 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 10 Jul 2019 14:33:40 +0200 Subject: [PATCH] vfio: fix interrupts race condition Populating the eventfd in rte_intr_enable in each request to vfio triggers a reconfiguration of the interrupt handler on the kernel side. The problem is that rte_intr_enable is often used to re-enable masked interrupts from drivers interrupt handlers. This reconfiguration leaves a window during which a device could send an interrupt and then the kernel logs this (unsolicited from the kernel point of view) interrupt: [158764.159833] do_IRQ: 9.34 No irq handler for vector VFIO api makes it possible to set the fd at setup time. Make use of this and then we only need to ask for masking/unmasking legacy interrupts and we have nothing to do for MSI/MSIX. "rxtx" interrupts are left untouched but are most likely subject to the same issue. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1654824 Fixes: 5c782b3928b8 ("vfio: interrupts") Cc: stable@dpdk.org Signed-off-by: David Marchand Tested-by: Shahed Shaikh --- drivers/bus/pci/linux/pci_vfio.c | 78 ++++----- lib/librte_eal/linux/eal/eal_interrupts.c | 197 +++++----------------- 2 files changed, 86 insertions(+), 189 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 1ceb1c07b4..ee31239655 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,8 +187,11 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - int i, ret, intr_idx; + char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; + struct vfio_irq_set *irq_set; enum rte_intr_mode intr_mode; + int i, ret, intr_idx; + int fd; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -220,7 +223,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; - int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -236,51 +238,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } + /* found a usable interrupt mode */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) + break; + /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, - " interrupt vector does not support eventfd!\n"); - return -1; - } else - continue; - } - - /* set up an eventfd for interrupts */ - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, " - "error %i (%s)\n", errno, strerror(errno)); + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); return -1; } + } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + if (i < 0) + return -1; - switch (i) { - case VFIO_PCI_MSIX_IRQ_INDEX: - intr_mode = RTE_INTR_MODE_MSIX; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - break; - case VFIO_PCI_MSI_IRQ_INDEX: - intr_mode = RTE_INTR_MODE_MSI; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - break; - case VFIO_PCI_INTX_IRQ_INDEX: - intr_mode = RTE_INTR_MODE_LEGACY; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - break; - default: - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); - return -1; - } + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } - return 0; + irq_set = (struct vfio_irq_set *)irq_set_buf; + irq_set->argsz = sizeof(irq_set_buf); + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = i; + irq_set->start = 0; + irq_set->count = 1; + memcpy(&irq_set->data, &fd, sizeof(int)); + if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { + RTE_LOG(ERR, EAL, " error configuring interrupt\n"); + close(fd); + return -1; } - /* if we're here, we haven't found a suitable interrupt vector */ - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + if (i == VFIO_PCI_MSIX_IRQ_INDEX) + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + else if (i == VFIO_PCI_MSI_IRQ_INDEX) + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + else if (i == VFIO_PCI_INTX_IRQ_INDEX) + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + + return 0; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8d74..27976b37ef 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,42 +109,19 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - int *fd_ptr; - - len = sizeof(irq_set_buf); - - /* enable INTx */ - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - *fd_ptr = intr_handle->fd; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", - intr_handle->fd); - return -1; - } +vfio_enable_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set irq_set; + int ret; - /* unmask INTx after enabling */ - memset(irq_set, 0, len); - len = sizeof(struct vfio_irq_set); - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; + /* unmask INTx */ + irq_set.argsz = sizeof(irq_set); + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set.start = 0; + irq_set.count = 1; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -156,128 +133,51 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); +vfio_disable_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set irq_set; + int ret; - /* mask interrupts before disabling */ - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; + /* mask interrupts */ + irq_set.argsz = sizeof(irq_set); + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set.start = 0; + irq_set.count = 1; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } - - /* disable INTx*/ - memset(irq_set, 0, len); - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, - "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); - return -1; - } - return 0; -} - -/* enable MSI interrupts */ -static int -vfio_enable_msi(const struct rte_intr_handle *intr_handle) { - int len, ret; - char irq_set_buf[IRQ_SET_BUF_LEN]; - struct vfio_irq_set *irq_set; - int *fd_ptr; - - len = sizeof(irq_set_buf); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - *fd_ptr = intr_handle->fd; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", - intr_handle->fd); - return -1; - } return 0; } -/* disable MSI interrupts */ -static int -vfio_disable_msi(const struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) - RTE_LOG(ERR, EAL, - "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); - - return ret; -} - /* enable MSI-X interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) { - int len, ret; +vfio_enable_msix(const struct rte_intr_handle *intr_handle) +{ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; - int *fd_ptr; + int len, ret; + + if (intr_handle->nb_efd == 0) + return 0; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ - irq_set->count = intr_handle->max_intr ? - (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? - RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - /* INTR vector offset 0 reserve for non-efds mapping */ - fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; - memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; + irq_set->count = intr_handle->nb_efd; + memcpy(&irq_set->data, intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -289,22 +189,21 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); +vfio_disable_msix(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set irq_set; + int ret; - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = 0; + if (intr_handle->nb_efd == 0) + return 0; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + irq_set.argsz = sizeof(irq_set); + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; + irq_set.count = intr_handle->nb_efd; + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -665,9 +564,7 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - if (vfio_enable_msi(intr_handle)) - return -1; - break; + return 0; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -721,9 +618,7 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - if (vfio_disable_msi(intr_handle)) - return -1; - break; + return 0; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 2.20.1