summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/eeh_driver.c
diff options
context:
space:
mode:
authorGavin Shan <shangw@linux.vnet.ibm.com>2013-07-24 10:24:58 +0800
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-07-24 14:18:48 +1000
commitf5c57710dd62dd06f176934a8b4b8accbf00f9f8 (patch)
tree6d6eb17f320ac2d57d5a801f9e0773a290040f3b /arch/powerpc/kernel/eeh_driver.c
parentab444ec97e8bd65fff9d489b9a409fc03979268b (diff)
powerpc/eeh: Use partial hotplug for EEH unaware drivers
When EEH error happens to one specific PE, some devices with drivers supporting EEH won't except hotplug on the device. However, there might have other deivces without driver, or with driver without EEH support. For the case, we need do partial hotplug in order to make sure that the PE becomes absolutely quite during reset. Otherise, the PE reset might fail and leads to failure of error recovery. The current code doesn't handle that 'mixed' case properly, it either uses the error callbacks to the drivers, or tries hotplug, but doesn't handle a PE (EEH domain) composed of a combination of the two. The patch intends to support so-called "partial" hotplug for EEH: Before we do reset, we stop and remove those PCI devices without EEH sensitive driver. The corresponding EEH devices are not detached from its PE, but with special flag. After the reset is done, those EEH devices with the special flag will be scanned one by one. Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel/eeh_driver.c')
-rw-r--r--arch/powerpc/kernel/eeh_driver.c74
1 files changed, 69 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9ef3bbb8580..9fda75d1f5a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -338,6 +338,54 @@ static void *eeh_report_failure(void *data, void *userdata)
return NULL;
}
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+ struct pci_driver *driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ int *removed = (int *)userdata;
+
+ /*
+ * Actually, we should remove the PCI bridges as well.
+ * However, that's lots of complexity to do that,
+ * particularly some of devices under the bridge might
+ * support EEH. So we just care about PCI devices for
+ * simplicity here.
+ */
+ if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+ return NULL;
+ driver = eeh_pcid_get(dev);
+ if (driver && driver->err_handler)
+ return NULL;
+
+ /* Remove it from PCI subsystem */
+ pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
+ edev->bus = dev->bus;
+ edev->mode |= EEH_DEV_DISCONNECTED;
+ (*removed)++;
+
+ pci_stop_and_remove_bus_device(dev);
+
+ return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev, *tmp;
+
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ if (!(edev->mode & EEH_DEV_DISCONNECTED))
+ continue;
+
+ edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+ eeh_rmv_from_parent_pe(edev);
+ }
+
+ return NULL;
+}
+
/**
* eeh_reset_device - Perform actual reset of a pci slot
* @pe: EEH PE
@@ -349,8 +397,9 @@ static void *eeh_report_failure(void *data, void *userdata)
*/
static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
{
+ struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
struct timeval tstamp;
- int cnt, rc;
+ int cnt, rc, removed = 0;
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -362,10 +411,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* devices are expected to be attached soon when calling
* into pcibios_add_pci_devices().
*/
- if (bus) {
- eeh_pe_state_mark(pe, EEH_PE_KEEP);
+ eeh_pe_state_mark(pe, EEH_PE_KEEP);
+ if (bus)
pcibios_remove_pci_devices(bus);
- }
+ else if (frozen_bus)
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
@@ -386,10 +436,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* potentially weird things happen.
*/
if (bus) {
+ pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
ssleep(5);
+
+ /*
+ * The EEH device is still connected with its parent
+ * PE. We should disconnect it so the binding can be
+ * rebuilt when adding PCI devices.
+ */
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
pcibios_add_pci_devices(bus);
- eeh_pe_state_clear(pe, EEH_PE_KEEP);
+ } else if (frozen_bus && removed) {
+ pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+ ssleep(5);
+
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+ pcibios_add_pci_devices(frozen_bus);
}
+ eeh_pe_state_clear(pe, EEH_PE_KEEP);
pe->tstamp = tstamp;
pe->freeze_count = cnt;