From 5f6ff30dc5075c49069d684bab229aef7ff0fdc3 Mon Sep 17 00:00:00 2001 From: Jingjing Wu Date: Tue, 10 Oct 2017 06:09:20 +0800 Subject: [PATCH] igb_uio: fix interrupt enablement after FLR in VM If pass-through a VF by vfio-pci to a Qemu VM, after FLR in VM, the interrupt setting is not recoverd correctly to host as below: in VM guest: Capabilities: [70] MSI-X: Enable+ Count=5 Masked- in Host: Capabilities: [70] MSI-X: Enable+ Count=5 Masked- That was because in pci_reset_function, it first reads the PCI configure and set FLR reset, and then writes PCI configure as restoration. But not all the writing are successful to Host. Because vfio-pci driver doesn't allow directly write PCI MSI-X Cap. To fix this issue, we need to move the interrupt enablement from igb_uio probe to open device file. While it is also the similar as the behaviour in vfio_pci kernel module code. Fixes: b58eedfc7dd5 ("igb_uio: issue FLR during open and release of device file") Cc: stable@dpdk.org Signed-off-by: Jingjing Wu Signed-off-by: Jianfeng Tan Tested-by: Shijith Thotton Acked-by: Ferruh Yigit --- lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 221 ++++++++++++---------- 1 file changed, 119 insertions(+), 102 deletions(-) diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index b67d7750cf..3884448ad4 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -49,7 +49,6 @@ struct rte_uio_pci_dev { static char *intr_mode; static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; - /* sriov sysfs */ static ssize_t show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -208,8 +207,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) * If yes, disable it here and will be enable later. */ static irqreturn_t -igbuio_pci_irqhandler(int irq, struct uio_info *info) +igbuio_pci_irqhandler(int irq, void *dev_id) { + struct uio_device *idev = (struct uio_device *)dev_id; + struct uio_info *info = idev->info; struct rte_uio_pci_dev *udev = info->priv; /* Legacy mode need to mask in hardware */ @@ -217,10 +218,115 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info) !pci_check_and_mask_intx(udev->pdev)) return IRQ_NONE; + uio_event_notify(info); + /* Message signal mode, no share IRQ and automasked */ return IRQ_HANDLED; } +static int +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) +{ + int err = 0; +#ifndef HAVE_ALLOC_IRQ_VECTORS + struct msix_entry msix_entry; +#endif + + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ +#ifndef HAVE_ALLOC_IRQ_VECTORS + msix_entry.entry = 0; + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif + + /* fall back to MSI */ + case RTE_INTR_MODE_MSI: +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (pci_enable_msi(udev->pdev) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#endif + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(udev->pdev)) { + dev_dbg(&udev->pdev->dev, "using INTX"); + udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n"); + /* fall back to no IRQ */ + case RTE_INTR_MODE_NONE: + udev->mode = RTE_INTR_MODE_NONE; + udev->info.irq = UIO_IRQ_NONE; + break; + + default: + dev_err(&udev->pdev->dev, "invalid IRQ mode %u", + igbuio_intr_mode_preferred); + udev->info.irq = UIO_IRQ_NONE; + err = -EINVAL; + } + + if (udev->info.irq != UIO_IRQ_NONE) + err = request_irq(udev->info.irq, igbuio_pci_irqhandler, + udev->info.irq_flags, udev->info.name, + udev->info.uio_dev); + dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + + return err; +} + +static void +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) +{ + if (udev->info.irq) { + free_irq(udev->info.irq, udev->info.uio_dev); + udev->info.irq = 0; + } + +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + if (udev->mode == RTE_INTR_MODE_MSI) + pci_disable_msi(udev->pdev); +#else + if (udev->mode == RTE_INTR_MODE_MSIX || + udev->mode == RTE_INTR_MODE_MSI) + pci_free_irq_vectors(udev->pdev); +#endif +} + + /** * This gets called while opening uio device file. */ @@ -229,12 +335,19 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode) { struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; + int err; pci_reset_function(dev); /* set bus master, which was cleared by the reset function */ pci_set_master(dev); + /* enable interrupts */ + err = igbuio_pci_enable_interrupts(udev); + if (err) { + dev_err(&dev->dev, "Enable interrupt fails\n"); + return err; + } return 0; } @@ -244,6 +357,9 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; + /* disable interrupts */ + igbuio_pci_disable_interrupts(udev); + /* stop the device from further DMA */ pci_clear_master(dev); @@ -313,94 +429,6 @@ igbuio_pci_release_iomem(struct uio_info *info) } } -static int -igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) -{ - int err = 0; -#ifndef HAVE_ALLOC_IRQ_VECTORS - struct msix_entry msix_entry; -#endif - - switch (igbuio_intr_mode_preferred) { - case RTE_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ -#ifndef HAVE_ALLOC_IRQ_VECTORS - msix_entry.entry = 0; - if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { - dev_dbg(&udev->pdev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = msix_entry.vector; - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#else - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) { - dev_dbg(&udev->pdev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = pci_irq_vector(udev->pdev, 0); - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#endif - /* fall back to MSI */ - case RTE_INTR_MODE_MSI: -#ifndef HAVE_ALLOC_IRQ_VECTORS - if (pci_enable_msi(udev->pdev) == 0) { - dev_dbg(&udev->pdev->dev, "using MSI"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = udev->pdev->irq; - udev->mode = RTE_INTR_MODE_MSI; - break; - } -#else - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) { - dev_dbg(&udev->pdev->dev, "using MSI"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = pci_irq_vector(udev->pdev, 0); - udev->mode = RTE_INTR_MODE_MSI; - break; - } -#endif - /* fall back to INTX */ - case RTE_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(udev->pdev)) { - dev_dbg(&udev->pdev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; - udev->info.irq = udev->pdev->irq; - udev->mode = RTE_INTR_MODE_LEGACY; - break; - } - dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n"); - /* fall back to no IRQ */ - case RTE_INTR_MODE_NONE: - udev->mode = RTE_INTR_MODE_NONE; - udev->info.irq = UIO_IRQ_NONE; - break; - - default: - dev_err(&udev->pdev->dev, "invalid IRQ mode %u", - igbuio_intr_mode_preferred); - err = -EINVAL; - } - - return err; -} - -static void -igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) -{ -#ifndef HAVE_ALLOC_IRQ_VECTORS - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(udev->pdev); - if (udev->mode == RTE_INTR_MODE_MSI) - pci_disable_msi(udev->pdev); -#else - if (udev->mode == RTE_INTR_MODE_MSIX || - udev->mode == RTE_INTR_MODE_MSI) - pci_free_irq_vectors(udev->pdev); -#endif -} - static int igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) { @@ -491,20 +519,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* fill uio infos */ udev->info.name = "igb_uio"; udev->info.version = "0.1"; - udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; udev->info.open = igbuio_pci_open; udev->info.release = igbuio_pci_release; udev->info.priv = udev; udev->pdev = dev; - err = igbuio_pci_enable_interrupts(udev); - if (err != 0) - goto fail_release_iomem; - err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); if (err != 0) - goto fail_disable_interrupts; + goto fail_release_iomem; /* register uio driver */ err = uio_register_device(&dev->dev, &udev->info); @@ -513,9 +536,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_drvdata(dev, udev); - dev_info(&dev->dev, "uio device registered with irq %lx\n", - udev->info.irq); - /* * Doing a harmless dma mapping for attaching the device to * the iommu identity mapping if kernel boots with iommu=pt. @@ -541,8 +561,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); -fail_disable_interrupts: - igbuio_pci_disable_interrupts(udev); fail_release_iomem: igbuio_pci_release_iomem(&udev->info); pci_disable_device(dev); @@ -559,7 +577,6 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(&udev->info); - igbuio_pci_disable_interrupts(udev); igbuio_pci_release_iomem(&udev->info); pci_disable_device(dev); pci_set_drvdata(dev, NULL); -- 2.20.1