From: Nithin Dabilpuram Date: Tue, 23 Jul 2019 08:04:17 +0000 (+0200) Subject: vfio: revert interrupt eventfd setup at probe X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=33543fb3b655f0b093bd9fa88f5fafddd18190ba;p=dpdk.git vfio: revert interrupt eventfd setup at probe This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram Signed-off-by: Jerin Jacob Tested-by: Stephen Hemminger Tested-by: Shahed Shaikh Tested-by: Lei Yao Acked-by: David Marchand --- diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239655..1ceb1c07b4 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b37ef..79ad5e8d74 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } -/* enable MSI-X interrupts */ +/* enable MSI interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1;