diff options
Diffstat (limited to 'arch/powerpc/kernel/eeh_driver.c')
-rw-r--r-- | arch/powerpc/kernel/eeh_driver.c | 128 |
1 files changed, 102 insertions, 26 deletions
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 420da61d4ce..6535936bdf2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } +static void *eeh_dev_save_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_save_state(pdev); + return NULL; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata) return NULL; } +static void *eeh_dev_restore_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_restore_state(pdev); + return NULL; +} + /** * eeh_report_resume - Tell device to resume normal operations * @data: eeh device @@ -450,38 +482,82 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - int i, rc; + bool *clear_sw_state = flag; + int i, rc = 1; - for (i = 0; i < 3; i++) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc) - continue; - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (!rc) - break; - } + for (i = 0; rc && i < 3; i++) + rc = eeh_unfreeze_pe(pe, clear_sw_state); - /* The PE has been isolated, clear it */ + /* Stop immediately on any errors */ if (rc) { - pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", + __func__, rc, pe->phb->global_number, pe->addr); return (void *)pe; } return NULL; } -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, + bool clear_sw_state) { void *rc; - rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return rc ? -EIO : 0; } +int eeh_pe_reset_and_recover(struct eeh_pe *pe) +{ + int result, ret; + + /* Bail if the PE is being recovered */ + if (pe->state & EEH_PE_RECOVERING) + return 0; + + /* Put the PE into recovery mode */ + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + + /* Save states */ + eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); + + /* Report error */ + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Issue reset */ + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + ret = eeh_reset_pe(pe); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED); + return ret; + } + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + + /* Unfreeze the PE */ + ret = eeh_clear_pe_frozen_state(pe, true); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + return ret; + } + + /* Notify completion of reset */ + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + + /* Restore device state */ + eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); + + /* Resume */ + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + /* Clear recovery mode */ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + + return 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -525,10 +601,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); rc = eeh_reset_pe(pe); if (rc) { - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); return rc; } @@ -537,10 +613,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) return rc; @@ -599,7 +675,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pe->freeze_count++; if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; - pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); /* Walk the various device drivers attached to this slot through @@ -616,7 +692,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warning("EEH: Permanent failure\n"); + pr_warn("EEH: Permanent failure\n"); goto hard_fail; } @@ -635,8 +711,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, frozen_bus); if (rc) { - pr_warning("%s: Unable to reset, err=%d\n", - __func__, rc); + pr_warn("%s: Unable to reset, err=%d\n", + __func__, rc); goto hard_fail; } } @@ -678,7 +754,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { - pr_warning("EEH: Device driver gave up\n"); + pr_warn("EEH: Device driver gave up\n"); goto hard_fail; } @@ -687,8 +763,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, NULL); if (rc) { - pr_warning("%s: Cannot reset, err=%d\n", - __func__, rc); + pr_warn("%s: Cannot reset, err=%d\n", + __func__, rc); goto hard_fail; } @@ -701,7 +777,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { - pr_warning("EEH: Not recovered\n"); + pr_warn("EEH: Not recovered\n"); goto hard_fail; } |