summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/eeh.c
diff options
context:
space:
mode:
authorGavin Shan <shangw@linux.vnet.ibm.com>2013-07-24 10:24:58 +0800
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-07-24 14:18:48 +1000
commitf5c57710dd62dd06f176934a8b4b8accbf00f9f8 (patch)
tree6d6eb17f320ac2d57d5a801f9e0773a290040f3b /arch/powerpc/kernel/eeh.c
parentab444ec97e8bd65fff9d489b9a409fc03979268b (diff)
powerpc/eeh: Use partial hotplug for EEH unaware drivers
When EEH error happens to one specific PE, some devices with drivers supporting EEH won't except hotplug on the device. However, there might have other deivces without driver, or with driver without EEH support. For the case, we need do partial hotplug in order to make sure that the PE becomes absolutely quite during reset. Otherise, the PE reset might fail and leads to failure of error recovery. The current code doesn't handle that 'mixed' case properly, it either uses the error callbacks to the drivers, or tries hotplug, but doesn't handle a PE (EEH domain) composed of a combination of the two. The patch intends to support so-called "partial" hotplug for EEH: Before we do reset, we stop and remove those PCI devices without EEH sensitive driver. The corresponding EEH devices are not detached from its PE, but with special flag. After the reset is done, those EEH devices with the special flag will be scanned one by one. Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel/eeh.c')
-rw-r--r--arch/powerpc/kernel/eeh.c30
1 files changed, 27 insertions, 3 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 56bd4584f61..a5783f1a7a9 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -900,7 +900,21 @@ void eeh_add_device_late(struct pci_dev *dev)
pr_debug("EEH: Already referenced !\n");
return;
}
- WARN_ON(edev->pdev);
+
+ /*
+ * The EEH cache might not be removed correctly because of
+ * unbalanced kref to the device during unplug time, which
+ * relies on pcibios_release_device(). So we have to remove
+ * that here explicitly.
+ */
+ if (edev->pdev) {
+ eeh_rmv_from_parent_pe(edev);
+ eeh_addr_cache_rmv_dev(edev->pdev);
+ eeh_sysfs_remove_device(edev->pdev);
+
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
+ }
edev->pdev = dev;
dev->dev.archdata.edev = edev;
@@ -982,14 +996,24 @@ void eeh_remove_device(struct pci_dev *dev)
/* Unregister the device with the EEH/PCI address search system */
pr_debug("EEH: Removing device %s\n", pci_name(dev));
- if (!edev || !edev->pdev) {
+ if (!edev || !edev->pdev || !edev->pe) {
pr_debug("EEH: Not referenced !\n");
return;
}
+
+ /*
+ * During the hotplug for EEH error recovery, we need the EEH
+ * device attached to the parent PE in order for BAR restore
+ * a bit later. So we keep it for BAR restore and remove it
+ * from the parent PE during the BAR resotre.
+ */
edev->pdev = NULL;
dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_rmv_from_parent_pe(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
- eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
}