summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Makefile5
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c543
-rw-r--r--arch/powerpc/platforms/pseries/eeh_cache.c59
-rw-r--r--arch/powerpc/platforms/pseries/eeh_dev.c14
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c310
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c59
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pe.c652
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c247
-rw-r--r--arch/powerpc/platforms/pseries/eeh_sysfs.c9
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c12
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c12
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c77
-rw-r--r--arch/powerpc/platforms/pseries/msi.c26
-rw-r--r--arch/powerpc/platforms/pseries/pci.c2
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c32
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c62
-rw-r--r--arch/powerpc/platforms/pseries/setup.c22
17 files changed, 1307 insertions, 836 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index c222189f5bb..890622b87c8 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,8 +6,9 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
firmware.o power.o dlpar.o mobility.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
- eeh_event.o eeh_sysfs.o eeh_pseries.o
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+ eeh_driver.o eeh_event.o eeh_sysfs.o \
+ eeh_pseries.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PSERIES_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index ecd394cf34e..9a04322b173 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -92,6 +92,20 @@ struct eeh_ops *eeh_ops = NULL;
int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);
+/*
+ * EEH probe mode support. The intention is to support multiple
+ * platforms for EEH. Some platforms like pSeries do PCI emunation
+ * based on device tree. However, other platforms like powernv probe
+ * PCI devices from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for particular
+ * OF node or PCI device so that the corresponding PE would be created
+ * there.
+ */
+int eeh_probe_mode;
+
+/* Global EEH mutex */
+DEFINE_MUTEX(eeh_mutex);
+
/* Lock to avoid races due to multiple reports of an error */
static DEFINE_RAW_SPINLOCK(confirm_error_lock);
@@ -204,22 +218,12 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
}
}
- /* Gather status on devices under the bridge */
- if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
- struct device_node *child;
-
- for_each_child_of_node(dn, child) {
- if (of_node_to_eeh_dev(child))
- n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n);
- }
- }
-
return n;
}
/**
* eeh_slot_error_detail - Generate combined log including driver log and error log
- * @edev: device to report error log for
+ * @pe: EEH PE
* @severity: temporary or permanent error log
*
* This routine should be called to generate the combined log, which
@@ -227,17 +231,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
* out from the config space of the corresponding PCI device, while
* the error log is fetched through platform dependent function call.
*/
-void eeh_slot_error_detail(struct eeh_dev *edev, int severity)
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
{
size_t loglen = 0;
- pci_regs_buf[0] = 0;
+ struct eeh_dev *edev;
- eeh_pci_enable(edev, EEH_OPT_THAW_MMIO);
- eeh_ops->configure_bridge(eeh_dev_to_of_node(edev));
- eeh_restore_bars(edev);
- loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+ eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
- eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen);
+ pci_regs_buf[0] = 0;
+ eeh_pe_for_each_dev(pe, edev) {
+ loglen += eeh_gather_pci_data(edev, pci_regs_buf,
+ EEH_PCI_REGS_LOG_LEN);
+ }
+
+ eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
}
/**
@@ -261,126 +270,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
}
/**
- * eeh_find_device_pe - Retrieve the PE for the given device
- * @dn: device node
- *
- * Return the PE under which this device lies
- */
-struct device_node *eeh_find_device_pe(struct device_node *dn)
-{
- while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
- (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
- dn = dn->parent;
- }
- return dn;
-}
-
-/**
- * __eeh_mark_slot - Mark all child devices as failed
- * @parent: parent device
- * @mode_flag: failure flag
- *
- * Mark all devices that are children of this device as failed.
- * Mark the device driver too, so that it can see the failure
- * immediately; this is critical, since some drivers poll
- * status registers in interrupts ... If a driver is polling,
- * and the slot is frozen, then the driver can deadlock in
- * an interrupt context, which is bad.
- */
-static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
-{
- struct device_node *dn;
-
- for_each_child_of_node(parent, dn) {
- if (of_node_to_eeh_dev(dn)) {
- /* Mark the pci device driver too */
- struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
-
- of_node_to_eeh_dev(dn)->mode |= mode_flag;
-
- if (dev && dev->driver)
- dev->error_state = pci_channel_io_frozen;
-
- __eeh_mark_slot(dn, mode_flag);
- }
- }
-}
-
-/**
- * eeh_mark_slot - Mark the indicated device and its children as failed
- * @dn: parent device
- * @mode_flag: failure flag
- *
- * Mark the indicated device and its child devices as failed.
- * The device drivers are marked as failed as well.
- */
-void eeh_mark_slot(struct device_node *dn, int mode_flag)
-{
- struct pci_dev *dev;
- dn = eeh_find_device_pe(dn);
-
- /* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
- dn = dn->parent;
-
- of_node_to_eeh_dev(dn)->mode |= mode_flag;
-
- /* Mark the pci device too */
- dev = of_node_to_eeh_dev(dn)->pdev;
- if (dev)
- dev->error_state = pci_channel_io_frozen;
-
- __eeh_mark_slot(dn, mode_flag);
-}
-
-/**
- * __eeh_clear_slot - Clear failure flag for the child devices
- * @parent: parent device
- * @mode_flag: flag to be cleared
- *
- * Clear failure flag for the child devices.
- */
-static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
-{
- struct device_node *dn;
-
- for_each_child_of_node(parent, dn) {
- if (of_node_to_eeh_dev(dn)) {
- of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
- of_node_to_eeh_dev(dn)->check_count = 0;
- __eeh_clear_slot(dn, mode_flag);
- }
- }
-}
-
-/**
- * eeh_clear_slot - Clear failure flag for the indicated device and its children
- * @dn: parent device
- * @mode_flag: flag to be cleared
- *
- * Clear failure flag for the indicated device and its children.
- */
-void eeh_clear_slot(struct device_node *dn, int mode_flag)
-{
- unsigned long flags;
- raw_spin_lock_irqsave(&confirm_error_lock, flags);
-
- dn = eeh_find_device_pe(dn);
-
- /* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
- dn = dn->parent;
-
- of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
- of_node_to_eeh_dev(dn)->check_count = 0;
- __eeh_clear_slot(dn, mode_flag);
- raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-}
-
-/**
- * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
- * @dn: device node
- * @dev: pci device, if known
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
*
* Check for an EEH failure for the given device node. Call this
* routine if the result of a read was all 0xff's and you want to
@@ -392,11 +283,13 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag)
*
* It is safe to call this routine in an interrupt context.
*/
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
unsigned long flags;
- struct eeh_dev *edev;
+ struct device_node *dn;
+ struct pci_dev *dev;
+ struct eeh_pe *pe;
int rc = 0;
const char *location;
@@ -405,23 +298,23 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
if (!eeh_subsystem_enabled)
return 0;
- if (!dn) {
+ if (!edev) {
eeh_stats.no_dn++;
return 0;
}
- dn = eeh_find_device_pe(dn);
- edev = of_node_to_eeh_dev(dn);
+ dn = eeh_dev_to_of_node(edev);
+ dev = eeh_dev_to_pci_dev(edev);
+ pe = edev->pe;
/* Access to IO BARs might get this far and still not want checking. */
- if (!(edev->mode & EEH_MODE_SUPPORTED) ||
- edev->mode & EEH_MODE_NOCHECK) {
+ if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check (%x) for %s %s\n",
- edev->mode, eeh_pci_name(dev), dn->full_name);
+ pr_debug("EEH: Ignored check for %s %s\n",
+ eeh_pci_name(dev), dn->full_name);
return 0;
}
- if (!edev->config_addr && !edev->pe_config_addr) {
+ if (!pe->addr && !pe->config_addr) {
eeh_stats.no_cfg_addr++;
return 0;
}
@@ -434,13 +327,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
*/
raw_spin_lock_irqsave(&confirm_error_lock, flags);
rc = 1;
- if (edev->mode & EEH_MODE_ISOLATED) {
- edev->check_count++;
- if (edev->check_count % EEH_MAX_FAILS == 0) {
+ if (pe->state & EEH_PE_ISOLATED) {
+ pe->check_count++;
+ if (pe->check_count % EEH_MAX_FAILS == 0) {
location = of_get_property(dn, "ibm,loc-code", NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
- edev->check_count, location,
+ pe->check_count, location,
eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
@@ -456,7 +349,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
- ret = eeh_ops->get_state(dn, NULL);
+ ret = eeh_ops->get_state(pe, NULL);
/* Note that config-io to empty slots may fail;
* they are empty when they don't have children.
@@ -469,7 +362,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
eeh_stats.false_positives++;
- edev->false_positives ++;
+ pe->false_positives++;
rc = 0;
goto dn_unlock;
}
@@ -480,10 +373,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* with other functions on this device, and functions under
* bridges.
*/
- eeh_mark_slot(dn, EEH_MODE_ISOLATED);
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
- eeh_send_failure_event(edev);
+ eeh_send_failure_event(pe);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
@@ -497,7 +390,7 @@ dn_unlock:
return rc;
}
-EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
/**
* eeh_check_failure - Check if all 1's data is due to EEH slot freeze
@@ -514,21 +407,19 @@ EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
{
unsigned long addr;
- struct pci_dev *dev;
- struct device_node *dn;
+ struct eeh_dev *edev;
/* Finding the phys addr + pci device; this is pretty quick. */
addr = eeh_token_to_phys((unsigned long __force) token);
- dev = pci_addr_cache_get_device(addr);
- if (!dev) {
+ edev = eeh_addr_cache_get_dev(addr);
+ if (!edev) {
eeh_stats.no_device++;
return val;
}
- dn = pci_device_to_OF_node(dev);
- eeh_dn_check_failure(dn, dev);
+ eeh_dev_check_failure(edev);
- pci_dev_put(dev);
+ pci_dev_put(eeh_dev_to_pci_dev(edev));
return val;
}
@@ -537,23 +428,22 @@ EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @edev: pci device node
+ * @pe: EEH PE
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
* recovery or log collection on the indicated device.
*/
-int eeh_pci_enable(struct eeh_dev *edev, int function)
+int eeh_pci_enable(struct eeh_pe *pe, int function)
{
int rc;
- struct device_node *dn = eeh_dev_to_of_node(edev);
- rc = eeh_ops->set_option(dn, function);
+ rc = eeh_ops->set_option(pe, function);
if (rc)
- printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
- function, rc, dn->full_name);
+ pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
+ __func__, function, pe->phb->global_number, pe->addr, rc);
- rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
(function == EEH_OPT_THAW_MMIO))
return 0;
@@ -571,17 +461,24 @@ int eeh_pci_enable(struct eeh_dev *edev, int function)
*/
int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
{
- struct device_node *dn = pci_device_to_OF_node(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+ struct eeh_pe *pe = edev->pe;
+
+ if (!pe) {
+ pr_err("%s: No PE found on PCI device %s\n",
+ __func__, pci_name(dev));
+ return -EINVAL;
+ }
switch (state) {
case pcie_deassert_reset:
- eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
break;
case pcie_hot_reset:
- eeh_ops->reset(dn, EEH_RESET_HOT);
+ eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
return -EINVAL;
@@ -591,66 +488,37 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
}
/**
- * __eeh_set_pe_freset - Check the required reset for child devices
- * @parent: parent device
- * @freset: return value
- *
- * Each device might have its preferred reset type: fundamental or
- * hot reset. The routine is used to collect the information from
- * the child devices so that they could be reset accordingly.
- */
-void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
-{
- struct device_node *dn;
-
- for_each_child_of_node(parent, dn) {
- if (of_node_to_eeh_dev(dn)) {
- struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
-
- if (dev && dev->driver)
- *freset |= dev->needs_freset;
-
- __eeh_set_pe_freset(dn, freset);
- }
- }
-}
-
-/**
- * eeh_set_pe_freset - Check the required reset for the indicated device and its children
- * @dn: parent device
- * @freset: return value
+ * eeh_set_pe_freset - Check the required reset for the indicated device
+ * @data: EEH device
+ * @flag: return value
*
* Each device might have its preferred reset type: fundamental or
* hot reset. The routine is used to collected the information for
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+static void *eeh_set_dev_freset(void *data, void *flag)
{
struct pci_dev *dev;
- dn = eeh_find_device_pe(dn);
-
- /* Back up one, since config addrs might be shared */
- if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
- dn = dn->parent;
+ unsigned int *freset = (unsigned int *)flag;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
- dev = of_node_to_eeh_dev(dn)->pdev;
+ dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
- __eeh_set_pe_freset(dn, freset);
+ return NULL;
}
/**
* eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @edev: pci device node to be reset.
+ * @pe: EEH PE
*
* Assert the PCI #RST line for 1/4 second.
*/
-static void eeh_reset_pe_once(struct eeh_dev *edev)
+static void eeh_reset_pe_once(struct eeh_pe *pe)
{
unsigned int freset = 0;
- struct device_node *dn = eeh_dev_to_of_node(edev);
/* Determine type of EEH reset required for
* Partitionable Endpoint, a hot-reset (1)
@@ -658,12 +526,12 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
* A fundamental reset required by any device under
* Partitionable Endpoint trumps hot-reset.
*/
- eeh_set_pe_freset(dn, &freset);
+ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
if (freset)
- eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
else
- eeh_ops->reset(dn, EEH_RESET_HOT);
+ eeh_ops->reset(pe, EEH_RESET_HOT);
/* The PCI bus requires that the reset be held high for at least
* a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -675,9 +543,9 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
* pci slot reset line is dropped. Make sure we don't miss
* these, and clear the flag now.
*/
- eeh_clear_slot(dn, EEH_MODE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
- eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
/* After a PCI slot has been reset, the PCI Express spec requires
* a 1.5 second idle time for the bus to stabilize, before starting
@@ -689,116 +557,36 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
/**
* eeh_reset_pe - Reset the indicated PE
- * @edev: PCI device associated EEH device
+ * @pe: EEH PE
*
* This routine should be called to reset indicated device, including
* PE. A PE might include multiple PCI devices and sometimes PCI bridges
* might be involved as well.
*/
-int eeh_reset_pe(struct eeh_dev *edev)
+int eeh_reset_pe(struct eeh_pe *pe)
{
int i, rc;
- struct device_node *dn = eeh_dev_to_of_node(edev);
/* Take three shots at resetting the bus */
for (i=0; i<3; i++) {
- eeh_reset_pe_once(edev);
+ eeh_reset_pe_once(pe);
- rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
return 0;
if (rc < 0) {
- printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
- dn->full_name);
+ pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+ __func__, pe->phb->global_number, pe->addr);
return -1;
}
- printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
- i+1, dn->full_name, rc);
+ pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
+ i+1, pe->phb->global_number, pe->addr, rc);
}
return -1;
}
-/** Save and restore of PCI BARs
- *
- * Although firmware will set up BARs during boot, it doesn't
- * set up device BAR's after a device reset, although it will,
- * if requested, set up bridge configuration. Thus, we need to
- * configure the PCI devices ourselves.
- */
-
-/**
- * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
- * @edev: PCI device associated EEH device
- *
- * Loads the PCI configuration space base address registers,
- * the expansion ROM base address, the latency timer, and etc.
- * from the saved values in the device node.
- */
-static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
-{
- int i;
- u32 cmd;
- struct device_node *dn = eeh_dev_to_of_node(edev);
-
- if (!edev->phb)
- return;
-
- for (i=4; i<10; i++) {
- eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
- }
-
- /* 12 == Expansion ROM Address */
- eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
-
-#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
-
- eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
- SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-
- eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
- SAVED_BYTE(PCI_LATENCY_TIMER));
-
- /* max latency, min grant, interrupt pin and line */
- eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
-
- /* Restore PERR & SERR bits, some devices require it,
- * don't touch the other command bits
- */
- eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
- if (edev->config_space[1] & PCI_COMMAND_PARITY)
- cmd |= PCI_COMMAND_PARITY;
- else
- cmd &= ~PCI_COMMAND_PARITY;
- if (edev->config_space[1] & PCI_COMMAND_SERR)
- cmd |= PCI_COMMAND_SERR;
- else
- cmd &= ~PCI_COMMAND_SERR;
- eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
-}
-
-/**
- * eeh_restore_bars - Restore the PCI config space info
- * @edev: EEH device
- *
- * This routine performs a recursive walk to the children
- * of this device as well.
- */
-void eeh_restore_bars(struct eeh_dev *edev)
-{
- struct device_node *dn;
- if (!edev)
- return;
-
- if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code))
- eeh_restore_one_device_bars(edev);
-
- for_each_child_of_node(eeh_dev_to_of_node(edev), dn)
- eeh_restore_bars(of_node_to_eeh_dev(dn));
-}
-
/**
* eeh_save_bars - Save device bars
* @edev: PCI device associated EEH device
@@ -808,7 +596,7 @@ void eeh_restore_bars(struct eeh_dev *edev)
* PCI devices are added individually; but, for the restore,
* an entire slot is reset at a time.
*/
-static void eeh_save_bars(struct eeh_dev *edev)
+void eeh_save_bars(struct eeh_dev *edev)
{
int i;
struct device_node *dn;
@@ -822,102 +610,6 @@ static void eeh_save_bars(struct eeh_dev *edev)
}
/**
- * eeh_early_enable - Early enable EEH on the indicated device
- * @dn: device node
- * @data: BUID
- *
- * Enable EEH functionality on the specified PCI device. The function
- * is expected to be called before real PCI probing is done. However,
- * the PHBs have been initialized at this point.
- */
-static void *eeh_early_enable(struct device_node *dn, void *data)
-{
- int ret;
- const u32 *class_code = of_get_property(dn, "class-code", NULL);
- const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
- const u32 *device_id = of_get_property(dn, "device-id", NULL);
- const u32 *regs;
- int enable;
- struct eeh_dev *edev = of_node_to_eeh_dev(dn);
-
- edev->class_code = 0;
- edev->mode = 0;
- edev->check_count = 0;
- edev->freeze_count = 0;
- edev->false_positives = 0;
-
- if (!of_device_is_available(dn))
- return NULL;
-
- /* Ignore bad nodes. */
- if (!class_code || !vendor_id || !device_id)
- return NULL;
-
- /* There is nothing to check on PCI to ISA bridges */
- if (dn->type && !strcmp(dn->type, "isa")) {
- edev->mode |= EEH_MODE_NOCHECK;
- return NULL;
- }
- edev->class_code = *class_code;
-
- /* Ok... see if this device supports EEH. Some do, some don't,
- * and the only way to find out is to check each and every one.
- */
- regs = of_get_property(dn, "reg", NULL);
- if (regs) {
- /* First register entry is addr (00BBSS00) */
- /* Try to enable eeh */
- ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
-
- enable = 0;
- if (ret == 0) {
- edev->config_addr = regs[0];
-
- /* If the newer, better, ibm,get-config-addr-info is supported,
- * then use that instead.
- */
- edev->pe_config_addr = eeh_ops->get_pe_addr(dn);
-
- /* Some older systems (Power4) allow the
- * ibm,set-eeh-option call to succeed even on nodes
- * where EEH is not supported. Verify support
- * explicitly.
- */
- ret = eeh_ops->get_state(dn, NULL);
- if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
- enable = 1;
- }
-
- if (enable) {
- eeh_subsystem_enabled = 1;
- edev->mode |= EEH_MODE_SUPPORTED;
-
- pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
- dn->full_name, edev->config_addr,
- edev->pe_config_addr);
- } else {
-
- /* This device doesn't support EEH, but it may have an
- * EEH parent, in which case we mark it as supported.
- */
- if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
- (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
- /* Parent supports EEH. */
- edev->mode |= EEH_MODE_SUPPORTED;
- edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
- return NULL;
- }
- }
- } else {
- printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
- dn->full_name);
- }
-
- eeh_save_bars(edev);
- return NULL;
-}
-
-/**
* eeh_ops_register - Register platform dependent EEH operations
* @ops: platform dependent EEH operations
*
@@ -982,7 +674,7 @@ int __exit eeh_ops_unregister(const char *name)
* Even if force-off is set, the EEH hardware is still enabled, so that
* newer systems can boot.
*/
-void __init eeh_init(void)
+static int __init eeh_init(void)
{
struct pci_controller *hose, *tmp;
struct device_node *phb;
@@ -992,27 +684,34 @@ void __init eeh_init(void)
if (!eeh_ops) {
pr_warning("%s: Platform EEH operation not found\n",
__func__);
- return;
+ return -EEXIST;
} else if ((ret = eeh_ops->init())) {
pr_warning("%s: Failed to call platform init function (%d)\n",
__func__, ret);
- return;
+ return ret;
}
raw_spin_lock_init(&confirm_error_lock);
/* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- phb = hose->dn;
- traverse_pci_devices(phb, eeh_early_enable, NULL);
+ if (eeh_probe_mode_devtree()) {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node) {
+ phb = hose->dn;
+ traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
+ }
}
if (eeh_subsystem_enabled)
- printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
else
- printk(KERN_WARNING "EEH: No capable adapters found\n");
+ pr_warning("EEH: No capable adapters found\n");
+
+ return ret;
}
+core_initcall_sync(eeh_init);
+
/**
* eeh_add_device_early - Enable EEH for the indicated device_node
* @dn: device node for which to set up EEH
@@ -1029,7 +728,7 @@ static void eeh_add_device_early(struct device_node *dn)
{
struct pci_controller *phb;
- if (!dn || !of_node_to_eeh_dev(dn))
+ if (!of_node_to_eeh_dev(dn))
return;
phb = of_node_to_eeh_dev(dn)->phb;
@@ -1037,7 +736,8 @@ static void eeh_add_device_early(struct device_node *dn)
if (NULL == phb || 0 == phb->buid)
return;
- eeh_early_enable(dn, NULL);
+ /* FIXME: hotplug support on POWERNV */
+ eeh_ops->of_probe(dn, NULL);
}
/**
@@ -1087,7 +787,7 @@ static void eeh_add_device_late(struct pci_dev *dev)
edev->pdev = dev;
dev->dev.archdata.edev = edev;
- pci_addr_cache_insert_device(dev);
+ eeh_addr_cache_insert_dev(dev);
eeh_sysfs_add_device(dev);
}
@@ -1117,6 +817,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
+ * @purge_pe: remove the PE or not
*
* This routine should be called when a device is removed from
* a running system (e.g. by hotplug or dlpar). It unregisters
@@ -1124,7 +825,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
* this device will no longer be detected after this call; thus,
* i/o errors affecting this slot may leave this device unusable.
*/
-static void eeh_remove_device(struct pci_dev *dev)
+static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
{
struct eeh_dev *edev;
@@ -1143,28 +844,30 @@ static void eeh_remove_device(struct pci_dev *dev)
dev->dev.archdata.edev = NULL;
pci_dev_put(dev);
- pci_addr_cache_remove_device(dev);
+ eeh_rmv_from_parent_pe(edev, purge_pe);
+ eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
}
/**
* eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
* @dev: PCI device
+ * @purge_pe: remove the corresponding PE or not
*
* This routine must be called when a device is removed from the
* running system through hotplug or dlpar. The corresponding
* PCI address cache will be removed.
*/
-void eeh_remove_bus_device(struct pci_dev *dev)
+void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
{
struct pci_bus *bus = dev->subordinate;
struct pci_dev *child, *tmp;
- eeh_remove_device(dev);
+ eeh_remove_device(dev, purge_pe);
if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
- eeh_remove_bus_device(child);
+ eeh_remove_bus_device(child, purge_pe);
}
}
EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index e5ae1c687c6..5a4c8790305 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -50,6 +50,7 @@ struct pci_io_addr_range {
struct rb_node rb_node;
unsigned long addr_lo;
unsigned long addr_hi;
+ struct eeh_dev *edev;
struct pci_dev *pcidev;
unsigned int flags;
};
@@ -59,7 +60,7 @@ static struct pci_io_addr_cache {
spinlock_t piar_lock;
} pci_io_addr_cache_root;
-static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
+static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
{
struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
@@ -74,7 +75,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
n = n->rb_right;
} else {
pci_dev_get(piar->pcidev);
- return piar->pcidev;
+ return piar->edev;
}
}
}
@@ -83,7 +84,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
}
/**
- * pci_addr_cache_get_device - Get device, given only address
+ * eeh_addr_cache_get_dev - Get device, given only address
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
@@ -92,15 +93,15 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
-struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
{
- struct pci_dev *dev;
+ struct eeh_dev *edev;
unsigned long flags;
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
- dev = __pci_addr_cache_get_device(addr);
+ edev = __eeh_addr_cache_get_device(addr);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
- return dev;
+ return edev;
}
#ifdef DEBUG
@@ -108,7 +109,7 @@ struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
* Handy-dandy debug print routine, does nothing more
* than print out the contents of our addr cache.
*/
-static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
+static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
{
struct rb_node *n;
int cnt = 0;
@@ -117,7 +118,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
+ pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
cnt++;
@@ -128,7 +129,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
/* Insert address range into the rb tree. */
static struct pci_io_addr_range *
-pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
unsigned long ahi, unsigned int flags)
{
struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
@@ -146,23 +147,24 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
} else {
if (dev != piar->pcidev ||
alo != piar->addr_lo || ahi != piar->addr_hi) {
- printk(KERN_WARNING "PIAR: overlapping address range\n");
+ pr_warning("PIAR: overlapping address range\n");
}
return piar;
}
}
- piar = kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+ piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
if (!piar)
return NULL;
pci_dev_get(dev);
piar->addr_lo = alo;
piar->addr_hi = ahi;
+ piar->edev = pci_dev_to_eeh_dev(dev);
piar->pcidev = dev;
piar->flags = flags;
#ifdef DEBUG
- printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
+ pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
alo, ahi, pci_name(dev));
#endif
@@ -172,7 +174,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
return piar;
}
-static void __pci_addr_cache_insert_device(struct pci_dev *dev)
+static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
struct device_node *dn;
struct eeh_dev *edev;
@@ -180,7 +182,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
if (!dn) {
- printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
+ pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
return;
}
@@ -192,8 +194,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
}
/* Skip any devices for which EEH is not enabled. */
- if (!(edev->mode & EEH_MODE_SUPPORTED) ||
- edev->mode & EEH_MODE_NOCHECK) {
+ if (!edev->pe) {
#ifdef DEBUG
pr_info("PCI: skip building address cache for=%s - %s\n",
pci_name(dev), dn->full_name);
@@ -212,19 +213,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
continue;
if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
continue;
- pci_addr_cache_insert(dev, start, end, flags);
+ eeh_addr_cache_insert(dev, start, end, flags);
}
}
/**
- * pci_addr_cache_insert_device - Add a device to the address cache
+ * eeh_addr_cache_insert_dev - Add a device to the address cache
* @dev: PCI device whose I/O addresses we are interested in.
*
* In order to support the fast lookup of devices based on addresses,
* we maintain a cache of devices that can be quickly searched.
* This routine adds a device to that cache.
*/
-void pci_addr_cache_insert_device(struct pci_dev *dev)
+void eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
unsigned long flags;
@@ -233,11 +234,11 @@ void pci_addr_cache_insert_device(struct pci_dev *dev)
return;
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
- __pci_addr_cache_insert_device(dev);
+ __eeh_addr_cache_insert_dev(dev);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
}
-static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
+static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
{
struct rb_node *n;
@@ -258,7 +259,7 @@ restart:
}
/**
- * pci_addr_cache_remove_device - remove pci device from addr cache
+ * eeh_addr_cache_rmv_dev - remove pci device from addr cache
* @dev: device to remove
*
* Remove a device from the addr-cache tree.
@@ -266,17 +267,17 @@ restart:
* the tree multiple times (once per resource).
* But so what; device removal doesn't need to be that fast.
*/
-void pci_addr_cache_remove_device(struct pci_dev *dev)
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
{
unsigned long flags;
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
- __pci_addr_cache_remove_device(dev);
+ __eeh_addr_cache_rmv_dev(dev);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
}
/**
- * pci_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_build - Build a cache of I/O addresses
*
* Build a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
@@ -284,7 +285,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev)
* Must be run late in boot process, after the pci controllers
* have been scanned for devices (after all device resources are known).
*/
-void __init pci_addr_cache_build(void)
+void __init eeh_addr_cache_build(void)
{
struct device_node *dn;
struct eeh_dev *edev;
@@ -293,7 +294,7 @@ void __init pci_addr_cache_build(void)
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
for_each_pci_dev(dev) {
- pci_addr_cache_insert_device(dev);
+ eeh_addr_cache_insert_dev(dev);
dn = pci_device_to_OF_node(dev);
if (!dn)
@@ -312,7 +313,7 @@ void __init pci_addr_cache_build(void)
#ifdef DEBUG
/* Verify tree built up above, echo back the list of addrs. */
- pci_addr_cache_print(&pci_io_addr_cache_root);
+ eeh_addr_cache_print(&pci_io_addr_cache_root);
#endif
}
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index c4507d09590..66442341d3a 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -55,7 +55,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
struct eeh_dev *edev;
/* Allocate EEH device */
- edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
if (!edev) {
pr_warning("%s: out of memory\n", __func__);
return NULL;
@@ -65,6 +65,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
PCI_DN(dn)->edev = edev;
edev->dn = dn;
edev->phb = phb;
+ INIT_LIST_HEAD(&edev->list);
return NULL;
}
@@ -80,6 +81,9 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
{
struct device_node *dn = phb->dn;
+ /* EEH PE for PHB */
+ eeh_phb_pe_create(phb);
+
/* EEH device for PHB */
eeh_dev_init(dn, phb);
@@ -93,10 +97,16 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
* Scan all the existing PHBs and create EEH devices for their OF
* nodes and their children OF nodes
*/
-void __init eeh_dev_phb_init(void)
+static int __init eeh_dev_phb_init(void)
{
struct pci_controller *phb, *tmp;
list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(phb);
+
+ pr_info("EEH: devices created\n");
+
+ return 0;
}
+
+core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index baf92cd9dfa..a3fefb61097 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -25,6 +25,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
@@ -47,6 +48,41 @@ static inline const char *eeh_pcid_name(struct pci_dev *pdev)
return "";
}
+/**
+ * eeh_pcid_get - Get the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is used to retrieve the PCI device driver for
+ * the indicated PCI device. Besides, we will increase the reference
+ * of the PCI device driver to prevent that being unloaded on
+ * the fly. Otherwise, kernel crash would be seen.
+ */
+static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return NULL;
+
+ if (!try_module_get(pdev->driver->driver.owner))
+ return NULL;
+
+ return pdev->driver;
+}
+
+/**
+ * eeh_pcid_put - Dereference on the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is called to do dereference on the PCI device
+ * driver of the indicated PCI device.
+ */
+static inline void eeh_pcid_put(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return;
+
+ module_put(pdev->driver->driver.owner);
+}
+
#if 0
static void print_device_node_tree(struct pci_dn *pdn, int dent)
{
@@ -93,7 +129,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
if (!irq_has_action(dev->irq))
return;
- edev->mode |= EEH_MODE_IRQ_DISABLED;
+ edev->mode |= EEH_DEV_IRQ_DISABLED;
disable_irq_nosync(dev->irq);
}
@@ -108,36 +144,44 @@ static void eeh_enable_irq(struct pci_dev *dev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
- if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
- edev->mode &= ~EEH_MODE_IRQ_DISABLED;
+ if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
+ edev->mode &= ~EEH_DEV_IRQ_DISABLED;
enable_irq(dev->irq);
}
}
/**
* eeh_report_error - Report pci error to each device driver
- * @dev: PCI device
+ * @data: eeh device
* @userdata: return value
*
* Report an EEH error to each device driver, collect up and
* merge the device driver responses. Cumulative response
* passed back in "userdata".
*/
-static int eeh_report_error(struct pci_dev *dev, void *userdata)
+static void *eeh_report_error(void *data, void *userdata)
{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver = dev->driver;
+ struct pci_driver *driver;
+ /* We might not have the associated PCI device,
+ * then we should continue for next one.
+ */
+ if (!dev) return NULL;
dev->error_state = pci_channel_io_frozen;
- if (!driver)
- return 0;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected)
- return 0;
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
@@ -145,27 +189,34 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
- return 0;
+ eeh_pcid_put(dev);
+ return NULL;
}
/**
* eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @dev: PCI device
+ * @data: eeh device
* @userdata: return value
*
* Tells each device driver that IO ports, MMIO and config space I/O
* are now enabled. Collects up and merges the device driver responses.
* Cumulative response passed back in "userdata".
*/
-static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+static void *eeh_report_mmio_enabled(void *data, void *userdata)
{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver = dev->driver;
+ struct pci_driver *driver;
- if (!driver ||
- !driver->err_handler ||
- !driver->err_handler->mmio_enabled)
- return 0;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ if (!driver->err_handler ||
+ !driver->err_handler->mmio_enabled) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
rc = driver->err_handler->mmio_enabled(dev);
@@ -173,12 +224,13 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
- return 0;
+ eeh_pcid_put(dev);
+ return NULL;
}
/**
* eeh_report_reset - Tell device that slot has been reset
- * @dev: PCI device
+ * @data: eeh device
* @userdata: return value
*
* This routine must be called while EEH tries to reset particular
@@ -186,21 +238,26 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
* some actions, usually to save data the driver needs so that the
* driver can work again while the device is recovered.
*/
-static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+static void *eeh_report_reset(void *data, void *userdata)
{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver = dev->driver;
-
- if (!driver)
- return 0;
+ struct pci_driver *driver;
+ if (!dev) return NULL;
dev->error_state = pci_channel_io_normal;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->slot_reset)
- return 0;
+ !driver->err_handler->slot_reset) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
rc = driver->err_handler->slot_reset(dev);
if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -208,109 +265,115 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
- return 0;
+ eeh_pcid_put(dev);
+ return NULL;
}
/**
* eeh_report_resume - Tell device to resume normal operations
- * @dev: PCI device
+ * @data: eeh device
* @userdata: return value
*
* This routine must be called to notify the device driver that it
* could resume so that the device driver can do some initialization
* to make the recovered device work again.
*/
-static int eeh_report_resume(struct pci_dev *dev, void *userdata)
+static void *eeh_report_resume(void *data, void *userdata)
{
- struct pci_driver *driver = dev->driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+ if (!dev) return NULL;
dev->error_state = pci_channel_io_normal;
- if (!driver)
- return 0;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->resume)
- return 0;
+ !driver->err_handler->resume) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
driver->err_handler->resume(dev);
- return 0;
+ eeh_pcid_put(dev);
+ return NULL;
}
/**
* eeh_report_failure - Tell device driver that device is dead.
- * @dev: PCI device
+ * @data: eeh device
* @userdata: return value
*
* This informs the device driver that the device is permanently
* dead, and that no further recovery attempts will be made on it.
*/
-static int eeh_report_failure(struct pci_dev *dev, void *userdata)
+static void *eeh_report_failure(void *data, void *userdata)
{
- struct pci_driver *driver = dev->driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+ if (!dev) return NULL;
dev->error_state = pci_channel_io_perm_failure;
- if (!driver)
- return 0;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected)
- return 0;
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
- return 0;
+ eeh_pcid_put(dev);
+ return NULL;
}
/**
* eeh_reset_device - Perform actual reset of a pci slot
- * @edev: PE associated EEH device
+ * @pe: EEH PE
* @bus: PCI bus corresponding to the isolcated slot
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
-static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
{
- struct device_node *dn;
int cnt, rc;
/* pcibios will clear the counter; save the value */
- cnt = edev->freeze_count;
+ cnt = pe->freeze_count;
+ /*
+ * We don't remove the corresponding PE instances because
+ * we need the information afterwords. The attached EEH
+ * devices are expected to be attached soon when calling
+ * into pcibios_add_pci_devices().
+ */
if (bus)
- pcibios_remove_pci_devices(bus);
+ __pcibios_remove_pci_devices(bus, 0);
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason.
*/
- rc = eeh_reset_pe(edev);
+ rc = eeh_reset_pe(pe);
if (rc)
return rc;
- /* Walk over all functions on this device. */
- dn = eeh_dev_to_of_node(edev);
- if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
- dn = dn->parent->child;
-
- while (dn) {
- struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
-
- /* On Power4, always true because eeh_pe_config_addr=0 */
- if (edev->pe_config_addr == pedev->pe_config_addr) {
- eeh_ops->configure_bridge(dn);
- eeh_restore_bars(pedev);
- }
- dn = dn->sibling;
- }
+ /* Restore PE */
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
@@ -322,7 +385,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
ssleep(5);
pcibios_add_pci_devices(bus);
}
- edev->freeze_count = cnt;
+ pe->freeze_count = cnt;
return 0;
}
@@ -334,7 +397,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
/**
* eeh_handle_event - Reset a PCI device after hard lockup.
- * @event: EEH event
+ * @pe: EEH PE
*
* While PHB detects address or data parity errors on particular PCI
* slot, the associated PE will be frozen. Besides, DMA's occurring
@@ -349,69 +412,24 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
* drivers (which cause a second set of hotplug events to go out to
* userspace).
*/
-struct eeh_dev *handle_eeh_events(struct eeh_event *event)
+void eeh_handle_event(struct eeh_pe *pe)
{
- struct device_node *frozen_dn;
- struct eeh_dev *frozen_edev;
struct pci_bus *frozen_bus;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
-
- frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
- if (!frozen_dn) {
- location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
- location = location ? location : "unknown";
- printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
- "for location=%s pci addr=%s\n",
- location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
- return NULL;
- }
-
- frozen_bus = pcibios_find_pci_bus(frozen_dn);
- location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
- location = location ? location : "unknown";
-
- /* There are two different styles for coming up with the PE.
- * In the old style, it was the highest EEH-capable device
- * which was always an EADS pci bridge. In the new style,
- * there might not be any EADS bridges, and even when there are,
- * the firmware marks them as "EEH incapable". So another
- * two-step is needed to find the pci bus..
- */
- if (!frozen_bus)
- frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
+ frozen_bus = eeh_pe_bus_get(pe);
if (!frozen_bus) {
- printk(KERN_ERR "EEH: Cannot find PCI bus "
- "for location=%s dn=%s\n",
- location, frozen_dn->full_name);
- return NULL;
+ pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->addr);
+ return;
}
- frozen_edev = of_node_to_eeh_dev(frozen_dn);
- frozen_edev->freeze_count++;
- pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
- drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
-
- if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+ pe->freeze_count++;
+ if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
goto excess_failures;
-
- printk(KERN_WARNING
- "EEH: This PCI device has failed %d times in the last hour:\n",
- frozen_edev->freeze_count);
-
- if (frozen_edev->pdev) {
- bus_pci_str = pci_name(frozen_edev->pdev);
- bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
- printk(KERN_WARNING
- "EEH: Bus location=%s driver=%s pci addr=%s\n",
- location, bus_drv_str, bus_pci_str);
- }
-
- printk(KERN_WARNING
- "EEH: Device location=%s driver=%s pci addr=%s\n",
- location, drv_str, pci_str);
+ pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+ pe->freeze_count);
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -419,12 +437,12 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
* status ... if any child can't handle the reset, then the entire
* slot is dlpar removed and added.
*/
- pci_walk_bus(frozen_bus, eeh_report_error, &result);
+ eeh_pe_dev_traverse(pe, eeh_report_error, &result);
/* Get the current PCI slot state. This can take a long time,
* sometimes over 3 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
+ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
printk(KERN_WARNING "EEH: Permanent failure\n");
goto hard_fail;
@@ -434,14 +452,14 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
* don't post the error log until after all dev drivers
* have been informed.
*/
- eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
* go down willingly, without panicing the system.
*/
if (result == PCI_ERS_RESULT_NONE) {
- rc = eeh_reset_device(frozen_edev, frozen_bus);
+ rc = eeh_reset_device(pe, frozen_bus);
if (rc) {
printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
goto hard_fail;
@@ -450,7 +468,7 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
/* If all devices reported they can proceed, then re-enable MMIO */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
if (rc < 0)
goto hard_fail;
@@ -458,13 +476,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
result = PCI_ERS_RESULT_NEED_RESET;
} else {
result = PCI_ERS_RESULT_NONE;
- pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
+ eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
}
}
/* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
- rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
if (rc < 0)
goto hard_fail;
@@ -482,13 +500,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
- rc = eeh_reset_device(frozen_edev, NULL);
+ rc = eeh_reset_device(pe, NULL);
if (rc) {
printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
goto hard_fail;
}
result = PCI_ERS_RESULT_NONE;
- pci_walk_bus(frozen_bus, eeh_report_reset, &result);
+ eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
}
/* All devices should claim they have recovered by now. */
@@ -499,9 +517,9 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
}
/* Tell all device drivers that they can resume operations */
- pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
+ eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return frozen_edev;
+ return;
excess_failures:
/*
@@ -509,30 +527,26 @@ excess_failures:
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- printk(KERN_ERR
- "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
- "has failed %d times in the last hour "
- "and has been permanently disabled.\n"
- "Please try reseating this device or replacing it.\n",
- location, drv_str, pci_str, frozen_edev->freeze_count);
+ pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+ "last hour and has been permanently disabled.\n"
+ "Please try reseating or replacing it.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
goto perm_error;
hard_fail:
- printk(KERN_ERR
- "EEH: Unable to recover from failure of PCI device "
- "at location=%s driver=%s pci addr=%s\n"
- "Please try reseating this device or replacing it.\n",
- location, drv_str, pci_str);
+ pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
perm_error:
- eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
- pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
+ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
/* Shut down the device drivers for good. */
- pcibios_remove_pci_devices(frozen_bus);
-
- return NULL;
+ if (frozen_bus)
+ pcibios_remove_pci_devices(frozen_bus);
}
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index fb506317ebb..185bedd926d 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -23,6 +23,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -57,9 +58,7 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_dev *edev;
-
- set_task_comm(current, "eehd");
+ struct eeh_pe *pe;
spin_lock_irqsave(&eeh_eventlist_lock, flags);
event = NULL;
@@ -76,28 +75,23 @@ static int eeh_event_handler(void * dummy)
/* Serialize processing of EEH events */
mutex_lock(&eeh_event_mutex);
- edev = event->edev;
- eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
-
- printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
- eeh_pci_name(edev->pdev));
+ pe = event->pe;
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
+ pe->phb->global_number, pe->addr);
set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */
- edev = handle_eeh_events(event);
-
- if (edev) {
- eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
- pci_dev_put(edev->pdev);
- }
+ eeh_handle_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
kfree(event);
mutex_unlock(&eeh_event_mutex);
/* If there are no new errors after an hour, clear the counter. */
- if (edev && edev->freeze_count>0) {
+ if (pe && pe->freeze_count > 0) {
msleep_interruptible(3600*1000);
- if (edev->freeze_count>0)
- edev->freeze_count--;
+ if (pe->freeze_count > 0)
+ pe->freeze_count--;
}
@@ -113,42 +107,29 @@ static int eeh_event_handler(void * dummy)
*/
static void eeh_thread_launcher(struct work_struct *dummy)
{
- if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0)
+ if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd")))
printk(KERN_ERR "Failed to start EEH daemon\n");
}
/**
* eeh_send_failure_event - Generate a PCI error event
- * @edev: EEH device
+ * @pe: EEH PE
*
* This routine can be called within an interrupt context;
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct eeh_dev *edev)
+int eeh_send_failure_event(struct eeh_pe *pe)
{
unsigned long flags;
struct eeh_event *event;
- struct device_node *dn = eeh_dev_to_of_node(edev);
- const char *location;
-
- if (!mem_init_done) {
- printk(KERN_ERR "EEH: event during early boot not handled\n");
- location = of_get_property(dn, "ibm,loc-code", NULL);
- printk(KERN_ERR "EEH: device node = %s\n", dn->full_name);
- printk(KERN_ERR "EEH: PCI location = %s\n", location);
- return 1;
- }
- event = kmalloc(sizeof(*event), GFP_ATOMIC);
- if (event == NULL) {
- printk(KERN_ERR "EEH: out of memory, event not handled\n");
- return 1;
- }
-
- if (edev->pdev)
- pci_dev_get(edev->pdev);
- event->edev = edev;
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event) {
+ pr_err("EEH: out of memory, event not handled\n");
+ return -ENOMEM;
+ }
+ event->pe = pe;
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
new file mode 100644
index 00000000000..797cd181dc3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -0,0 +1,652 @@
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_pe_alloc - Allocate PE
+ * @phb: PCI controller
+ * @type: PE type
+ *
+ * Allocate PE instance dynamically.
+ */
+static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+ if (!pe) return NULL;
+
+ /* Initialize PHB PE */
+ pe->type = type;
+ pe->phb = phb;
+ INIT_LIST_HEAD(&pe->child_list);
+ INIT_LIST_HEAD(&pe->child);
+ INIT_LIST_HEAD(&pe->edevs);
+
+ return pe;
+}
+
+/**
+ * eeh_phb_pe_create - Create PHB PE
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int __devinit eeh_phb_pe_create(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = eeh_pe_alloc(phb, EEH_PE_PHB);
+ if (!pe) {
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* Put it into the list */
+ eeh_lock();
+ list_add_tail(&pe->child, &eeh_phb_pe);
+ eeh_unlock();
+
+ pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+
+ return 0;
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ list_for_each_entry(pe, &eeh_phb_pe, child) {
+ /*
+ * Actually, we needn't check the type since
+ * the PE for PHB has been determined when that
+ * was created.
+ */
+ if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
+ return pe;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
+ struct eeh_pe *root)
+{
+ struct list_head *next = pe->child_list.next;
+
+ if (next == &pe->child_list) {
+ while (1) {
+ if (pe == root)
+ return NULL;
+ next = pe->child.next;
+ if (next != &pe->parent->child_list)
+ break;
+ pe = pe->parent;
+ }
+ }
+
+ return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+static void *eeh_pe_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ void *ret;
+
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ ret = fn(pe, flag);
+ if (ret) return ret;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_traverse - Traverse the devices from the PE
+ * @root: EEH PE
+ * @fn: function callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the devices of the specified
+ * PE and its child PEs.
+ */
+void *eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev;
+ void *ret;
+
+ if (!root) {
+ pr_warning("%s: Invalid PE %p\n", __func__, root);
+ return NULL;
+ }
+
+ eeh_lock();
+
+ /* Traverse root PE */
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_pe_for_each_dev(pe, edev) {
+ ret = fn(edev, flag);
+ if (ret) {
+ eeh_unlock();
+ return ret;
+ }
+ }
+ }
+
+ eeh_unlock();
+
+ return NULL;
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ * @data: EEH PE
+ * @flag: EEH device
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev = (struct eeh_dev *)flag;
+
+ /* Unexpected PHB PE */
+ if (pe->type & EEH_PE_PHB)
+ return NULL;
+
+ /* We prefer PE address */
+ if (edev->pe_config_addr &&
+ (edev->pe_config_addr == pe->addr))
+ return pe;
+
+ /* Try BDF address */
+ if (edev->pe_config_addr &&
+ (edev->config_addr == pe->config_addr))
+ return pe;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @edev: EEH device
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+{
+ struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *pe;
+
+ pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+
+ return pe;
+}
+
+/**
+ * eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
+{
+ struct device_node *dn;
+ struct eeh_dev *parent;
+
+ /*
+ * It might have the case for the indirect parent
+ * EEH device already having associated PE, but
+ * the direct parent EEH device doesn't have yet.
+ */
+ dn = edev->dn->parent;
+ while (dn) {
+ /* We're poking out of PCI territory */
+ if (!PCI_DN(dn)) return NULL;
+
+ parent = of_node_to_eeh_dev(dn);
+ /* We're poking out of PCI territory */
+ if (!parent) return NULL;
+
+ if (parent->pe)
+ return parent->pe;
+
+ dn = dn->parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_add_to_parent_pe - Add EEH device to parent PE
+ * @edev: EEH device
+ *
+ * Add EEH device to the parent PE. If the parent PE already
+ * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
+ * we have to create new PE to hold the EEH device and the new
+ * PE will be linked to its parent PE as well.
+ */
+int eeh_add_to_parent_pe(struct eeh_dev *edev)
+{
+ struct eeh_pe *pe, *parent;
+
+ eeh_lock();
+
+ /*
+ * Search the PE has been existing or not according
+ * to the PE address. If that has been existing, the
+ * PE should be composed of PCI bus and its subordinate
+ * components.
+ */
+ pe = eeh_pe_get(edev);
+ if (pe && !(pe->type & EEH_PE_INVALID)) {
+ if (!edev->pe_config_addr) {
+ eeh_unlock();
+ pr_err("%s: PE with addr 0x%x already exists\n",
+ __func__, edev->config_addr);
+ return -EEXIST;
+ }
+
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
+
+ /* Put the edev to PE */
+ list_add_tail(&edev->list, &pe->edevs);
+ eeh_unlock();
+ pr_debug("EEH: Add %s to Bus PE#%x\n",
+ edev->dn->full_name, pe->addr);
+
+ return 0;
+ } else if (pe && (pe->type & EEH_PE_INVALID)) {
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+ eeh_unlock();
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+ }
+
+ /* Create a new EEH PE */
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (!pe) {
+ eeh_unlock();
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+ pe->addr = edev->pe_config_addr;
+ pe->config_addr = edev->config_addr;
+
+ /*
+ * Put the new EEH PE into hierarchy tree. If the parent
+ * can't be found, the newly created PE will be attached
+ * to PHB directly. Otherwise, we have to associate the
+ * PE with its parent.
+ */
+ parent = eeh_pe_get_parent(edev);
+ if (!parent) {
+ parent = eeh_phb_pe_get(edev->phb);
+ if (!parent) {
+ eeh_unlock();
+ pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
+ __func__, edev->phb->global_number);
+ edev->pe = NULL;
+ kfree(pe);
+ return -EEXIST;
+ }
+ }
+ pe->parent = parent;
+
+ /*
+ * Put the newly created PE into the child list and
+ * link the EEH device accordingly.
+ */
+ list_add_tail(&pe->child, &parent->child_list);
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ eeh_unlock();
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+}
+
+/**
+ * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * @edev: EEH device
+ * @purge_pe: remove PE or not
+ *
+ * The PE hierarchy tree might be changed when doing PCI hotplug.
+ * Also, the PCI devices or buses could be removed from the system
+ * during EEH recovery. So we have to call the function remove the
+ * corresponding PE accordingly if necessary.
+ */
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
+{
+ struct eeh_pe *pe, *parent, *child;
+ int cnt;
+
+ if (!edev->pe) {
+ pr_warning("%s: No PE found for EEH device %s\n",
+ __func__, edev->dn->full_name);
+ return -EEXIST;
+ }
+
+ eeh_lock();
+
+ /* Remove the EEH device */
+ pe = edev->pe;
+ edev->pe = NULL;
+ list_del(&edev->list);
+
+ /*
+ * Check if the parent PE includes any EEH devices.
+ * If not, we should delete that. Also, we should
+ * delete the parent PE if it doesn't have associated
+ * child PEs and EEH devices.
+ */
+ while (1) {
+ parent = pe->parent;
+ if (pe->type & EEH_PE_PHB)
+ break;
+
+ if (purge_pe) {
+ if (list_empty(&pe->edevs) &&
+ list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ } else {
+ break;
+ }
+ } else {
+ if (list_empty(&pe->edevs)) {
+ cnt = 0;
+ list_for_each_entry(child, &pe->child_list, child) {
+ if (!(pe->type & EEH_PE_INVALID)) {
+ cnt++;
+ break;
+ }
+ }
+
+ if (!cnt)
+ pe->type |= EEH_PE_INVALID;
+ else
+ break;
+ }
+ }
+
+ pe = parent;
+ }
+
+ eeh_unlock();
+
+ return 0;
+}
+
+/**
+ * __eeh_pe_state_mark - Mark the state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to mark the indicated state for the given
+ * PE. Also, the associated PCI devices will be put into IO frozen
+ * state as well.
+ */
+static void *__eeh_pe_state_mark(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+ struct eeh_dev *tmp;
+ struct pci_dev *pdev;
+
+ /*
+ * Mark the PE with the indicated state. Also,
+ * the associated PCI device will be put into
+ * I/O frozen state to avoid I/O accesses from
+ * the PCI device driver.
+ */
+ pe->state |= state;
+ eeh_pe_for_each_dev(pe, tmp) {
+ pdev = eeh_dev_to_pci_dev(tmp);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
+ *
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
+ */
+void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+{
+ eeh_lock();
+ eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ eeh_unlock();
+}
+
+/**
+ * __eeh_pe_state_clear - Clear state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to clear the indicated state from the
+ * given PE. Besides, we also clear the check count of the PE
+ * as well.
+ */
+static void *__eeh_pe_state_clear(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+
+ pe->state &= ~state;
+ pe->check_count = 0;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_clear - Clear state for the PE and its children
+ * @pe: PE
+ * @state: state to be cleared
+ *
+ * When the PE and its children has been recovered from error,
+ * we need clear the error state for that. The function is used
+ * for the purpose.
+ */
+void eeh_pe_state_clear(struct eeh_pe *pe, int state)
+{
+ eeh_lock();
+ eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+ eeh_unlock();
+}
+
+/**
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @data: EEH device
+ * @flag: Unused
+ *
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static void *eeh_restore_one_device_bars(void *data, void *flag)
+{
+ int i;
+ u32 cmd;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+
+ for (i = 4; i < 10; i++)
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ /* 12 == Expansion ROM Address */
+ eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
+
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+
+ /* max latency, min grant, interrupt pin and line */
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+ /*
+ * Restore PERR & SERR bits, some devices require it,
+ * don't touch the other command bits
+ */
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+ if (edev->config_space[1] & PCI_COMMAND_PARITY)
+ cmd |= PCI_COMMAND_PARITY;
+ else
+ cmd &= ~PCI_COMMAND_PARITY;
+ if (edev->config_space[1] & PCI_COMMAND_SERR)
+ cmd |= PCI_COMMAND_SERR;
+ else
+ cmd &= ~PCI_COMMAND_SERR;
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_restore_bars - Restore the PCI config space info
+ * @pe: EEH PE
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_pe_restore_bars(struct eeh_pe *pe)
+{
+ /*
+ * We needn't take the EEH lock since eeh_pe_dev_traverse()
+ * will take that.
+ */
+ eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
+}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+ struct pci_bus *bus = NULL;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ eeh_lock();
+
+ if (pe->type & EEH_PE_PHB) {
+ bus = pe->phb->bus;
+ } else if (pe->type & EEH_PE_BUS) {
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ bus = pdev->bus;
+ }
+
+ eeh_unlock();
+
+ return bus;
+}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index c33360ec4f4..19506f93573 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -129,27 +129,117 @@ static int pseries_eeh_init(void)
eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
}
+ /* Set EEH probe mode */
+ eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE);
+
return 0;
}
/**
+ * pseries_eeh_of_probe - EEH probe on the given device
+ * @dn: OF node
+ * @flag: Unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose.
+ */
+static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
+{
+ struct eeh_dev *edev;
+ struct eeh_pe pe;
+ const u32 *class_code, *vendor_id, *device_id;
+ const u32 *regs;
+ int enable = 0;
+ int ret;
+
+ /* Retrieve OF node and eeh device */
+ edev = of_node_to_eeh_dev(dn);
+ if (!of_device_is_available(dn))
+ return NULL;
+
+ /* Retrieve class/vendor/device IDs */
+ class_code = of_get_property(dn, "class-code", NULL);
+ vendor_id = of_get_property(dn, "vendor-id", NULL);
+ device_id = of_get_property(dn, "device-id", NULL);
+
+ /* Skip for bad OF node or PCI-ISA bridge */
+ if (!class_code || !vendor_id || !device_id)
+ return NULL;
+ if (dn->type && !strcmp(dn->type, "isa"))
+ return NULL;
+
+ /* Update class code and mode of eeh device */
+ edev->class_code = *class_code;
+ edev->mode = 0;
+
+ /* Retrieve the device address */
+ regs = of_get_property(dn, "reg", NULL);
+ if (!regs) {
+ pr_warning("%s: OF node property %s::reg not found\n",
+ __func__, dn->full_name);
+ return NULL;
+ }
+
+ /* Initialize the fake PE */
+ memset(&pe, 0, sizeof(struct eeh_pe));
+ pe.phb = edev->phb;
+ pe.config_addr = regs[0];
+
+ /* Enable EEH on the device */
+ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
+ if (!ret) {
+ edev->config_addr = regs[0];
+ /* Retrieve PE address */
+ edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
+ pe.addr = edev->pe_config_addr;
+
+ /* Some older systems (Power4) allow the ibm,set-eeh-option
+ * call to succeed even on nodes where EEH is not supported.
+ * Verify support explicitly.
+ */
+ ret = eeh_ops->get_state(&pe, NULL);
+ if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
+ enable = 1;
+
+ if (enable) {
+ eeh_subsystem_enabled = 1;
+ eeh_add_to_parent_pe(edev);
+
+ pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
+ __func__, dn->full_name, pe.phb->global_number,
+ pe.addr, pe.config_addr);
+ } else if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+ (of_node_to_eeh_dev(dn->parent))->pe) {
+ /* This device doesn't support EEH, but it may have an
+ * EEH parent, in which case we mark it as supported.
+ */
+ edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
+ edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr;
+ eeh_add_to_parent_pe(edev);
+ }
+ }
+
+ /* Save memory bars */
+ eeh_save_bars(edev);
+
+ return NULL;
+}
+
+/**
* pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
- * @dn: device node
+ * @pe: EEH PE
* @option: operation to be issued
*
* The function is used to control the EEH functionality globally.
* Currently, following options are support according to PAPR:
* Enable EEH, Disable EEH, Enable MMIO and Enable DMA
*/
-static int pseries_eeh_set_option(struct device_node *dn, int option)
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
{
int ret = 0;
- struct eeh_dev *edev;
- const u32 *reg;
int config_addr;
- edev = of_node_to_eeh_dev(dn);
-
/*
* When we're enabling or disabling EEH functioality on
* the particular PE, the PE config address is possibly
@@ -159,15 +249,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
switch (option) {
case EEH_OPT_DISABLE:
case EEH_OPT_ENABLE:
- reg = of_get_property(dn, "reg", NULL);
- config_addr = reg[0];
- break;
-
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
- config_addr = edev->config_addr;
- if (edev->pe_config_addr)
- config_addr = edev->pe_config_addr;
+ config_addr = pe->config_addr;
+ if (pe->addr)
+ config_addr = pe->addr;
break;
default:
@@ -177,15 +263,15 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
}
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), option);
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), option);
return ret;
}
/**
* pseries_eeh_get_pe_addr - Retrieve PE address
- * @dn: device node
+ * @pe: EEH PE
*
* Retrieve the assocated PE address. Actually, there're 2 RTAS
* function calls dedicated for the purpose. We need implement
@@ -196,14 +282,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
* It's notable that zero'ed return value means invalid PE config
* address.
*/
-static int pseries_eeh_get_pe_addr(struct device_node *dn)
+static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
{
- struct eeh_dev *edev;
int ret = 0;
int rets[3];
- edev = of_node_to_eeh_dev(dn);
-
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
* First of all, we need to make sure there has one PE
@@ -211,18 +294,18 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
* meaningless.
*/
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- edev->config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), 1);
+ pe->config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), 1);
if (ret || (rets[0] == 0))
return 0;
/* Retrieve the associated PE config address */
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- edev->config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), 0);
+ pe->config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), 0);
if (ret) {
- pr_warning("%s: Failed to get PE address for %s\n",
- __func__, dn->full_name);
+ pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->config_addr);
return 0;
}
@@ -231,11 +314,11 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- edev->config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), 0);
+ pe->config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), 0);
if (ret) {
- pr_warning("%s: Failed to get PE address for %s\n",
- __func__, dn->full_name);
+ pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->config_addr);
return 0;
}
@@ -247,7 +330,7 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
/**
* pseries_eeh_get_state - Retrieve PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
* @state: return value
*
* Retrieve the state of the specified PE. On RTAS compliant
@@ -258,30 +341,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
* RTAS calls for the purpose, we need to try the new one and back
* to the old one if the new one couldn't work properly.
*/
-static int pseries_eeh_get_state(struct device_node *dn, int *state)
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
{
- struct eeh_dev *edev;
int config_addr;
int ret;
int rets[4];
int result;
/* Figure out PE config address if possible */
- edev = of_node_to_eeh_dev(dn);
- config_addr = edev->config_addr;
- if (edev->pe_config_addr)
- config_addr = edev->pe_config_addr;
+ config_addr = pe->config_addr;
+ if (pe->addr)
+ config_addr = pe->addr;
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid));
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
/* Fake PE unavailable info */
rets[2] = 0;
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid));
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
} else {
return EEH_STATE_NOT_SUPPORT;
}
@@ -333,34 +414,32 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state)
/**
* pseries_eeh_reset - Reset the specified PE
- * @dn: PE associated device node
+ * @pe: EEH PE
* @option: reset option
*
* Reset the specified PE
*/
-static int pseries_eeh_reset(struct device_node *dn, int option)
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
{
- struct eeh_dev *edev;
int config_addr;
int ret;
/* Figure out PE address */
- edev = of_node_to_eeh_dev(dn);
- config_addr = edev->config_addr;
- if (edev->pe_config_addr)
- config_addr = edev->pe_config_addr;
+ config_addr = pe->config_addr;
+ if (pe->addr)
+ config_addr = pe->addr;
/* Reset PE through RTAS call */
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), option);
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), option);
/* If fundamental-reset not supported, try hot-reset */
if (option == EEH_RESET_FUNDAMENTAL &&
ret == -8) {
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid), EEH_RESET_HOT);
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), EEH_RESET_HOT);
}
return ret;
@@ -368,13 +447,13 @@ static int pseries_eeh_reset(struct device_node *dn, int option)
/**
* pseries_eeh_wait_state - Wait for PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
* @max_wait: maximal period in microsecond
*
* Wait for the state of associated PE. It might take some time
* to retrieve the PE's state.
*/
-static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
+static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
{
int ret;
int mwait;
@@ -391,7 +470,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
while (1) {
- ret = pseries_eeh_get_state(dn, &mwait);
+ ret = pseries_eeh_get_state(pe, &mwait);
/*
* If the PE's state is temporarily unavailable,
@@ -426,7 +505,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
/**
* pseries_eeh_get_log - Retrieve error log
- * @dn: device node
+ * @pe: EEH PE
* @severity: temporary or permanent error log
* @drv_log: driver log to be combined with retrieved error log
* @len: length of driver log
@@ -435,24 +514,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
* Actually, the error will be retrieved through the dedicated
* RTAS call.
*/
-static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
{
- struct eeh_dev *edev;
int config_addr;
unsigned long flags;
int ret;
- edev = of_node_to_eeh_dev(dn);
spin_lock_irqsave(&slot_errbuf_lock, flags);
memset(slot_errbuf, 0, eeh_error_buf_size);
/* Figure out the PE address */
- config_addr = edev->config_addr;
- if (edev->pe_config_addr)
- config_addr = edev->pe_config_addr;
+ config_addr = pe->config_addr;
+ if (pe->addr)
+ config_addr = pe->addr;
ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
- BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
+ BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
virt_to_phys(drv_log), len,
virt_to_phys(slot_errbuf), eeh_error_buf_size,
severity);
@@ -465,40 +542,38 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l
/**
* pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
- * @dn: PE associated device node
+ * @pe: EEH PE
*
* The function will be called to reconfigure the bridges included
* in the specified PE so that the mulfunctional PE would be recovered
* again.
*/
-static int pseries_eeh_configure_bridge(struct device_node *dn)
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
{
- struct eeh_dev *edev;
int config_addr;
int ret;
/* Figure out the PE address */
- edev = of_node_to_eeh_dev(dn);
- config_addr = edev->config_addr;
- if (edev->pe_config_addr)
- config_addr = edev->pe_config_addr;
+ config_addr = pe->config_addr;
+ if (pe->addr)
+ config_addr = pe->addr;
/* Use new configure-pe function, if supported */
if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid));
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
- config_addr, BUID_HI(edev->phb->buid),
- BUID_LO(edev->phb->buid));
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
} else {
return -EFAULT;
}
if (ret)
- pr_warning("%s: Unable to configure bridge %d for %s\n",
- __func__, ret, dn->full_name);
+ pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+ __func__, pe->phb->global_number, pe->addr, ret);
return ret;
}
@@ -542,6 +617,8 @@ static int pseries_eeh_write_config(struct device_node *dn, int where, int size,
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
.init = pseries_eeh_init,
+ .of_probe = pseries_eeh_of_probe,
+ .dev_probe = NULL,
.set_option = pseries_eeh_set_option,
.get_pe_addr = pseries_eeh_get_pe_addr,
.get_state = pseries_eeh_get_state,
@@ -559,7 +636,21 @@ static struct eeh_ops pseries_eeh_ops = {
* EEH initialization on pseries platform. This function should be
* called before any EEH related functions.
*/
-int __init eeh_pseries_init(void)
+static int __init eeh_pseries_init(void)
{
- return eeh_ops_register(&pseries_eeh_ops);
+ int ret = -EINVAL;
+
+ if (!machine_is(pseries))
+ return ret;
+
+ ret = eeh_ops_register(&pseries_eeh_ops);
+ if (!ret)
+ pr_info("EEH: pSeries platform initialized\n");
+ else
+ pr_info("EEH: pSeries platform initialization failure (%d)\n",
+ ret);
+
+ return ret;
}
+
+early_initcall(eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index 243b3510d70..d37708360f2 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -53,9 +53,6 @@ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
-EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" );
-EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" );
-EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" );
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
@@ -64,9 +61,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count);
if (rc)
printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
@@ -77,8 +71,5 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
- device_remove_file(&pdev->dev, &dev_attr_eeh_check_count);
- device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives);
- device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count);
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 11d8e0544ac..ecdb0a6b317 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -78,6 +78,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
unsigned long start, start_pfn;
struct zone *zone;
int ret;
+ unsigned long section;
+ unsigned long sections_to_remove;
start_pfn = base >> PAGE_SHIFT;
@@ -97,9 +99,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
* to sysfs "state" file and we can't remove sysfs entries
* while writing to it. So we have to defer it to here.
*/
- ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
- if (ret)
- return ret;
+ sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
+ for (section = 0; section < sections_to_remove; section++) {
+ unsigned long pfn = start_pfn + section * PAGES_PER_SECTION;
+ ret = __remove_pages(zone, pfn, PAGES_PER_SECTION);
+ if (ret)
+ return ret;
+ }
/*
* Update memory regions for memory remove
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index bca220f2873..6153eea27ce 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <linux/spinlock.h>
#include <linux/sched.h> /* for show_stack */
#include <linux/string.h>
@@ -41,7 +42,6 @@
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
-#include <asm/abs_addr.h>
#include <asm/pSeries_reconfig.h>
#include <asm/firmware.h>
#include <asm/tce.h>
@@ -99,7 +99,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
- rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> TCE_SHIFT;
*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
uaddr += TCE_PAGE_SIZE;
@@ -148,7 +148,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
long tcenum_start = tcenum, npages_start = npages;
- rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> TCE_SHIFT;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
@@ -217,7 +217,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
__get_cpu_var(tce_page) = tcep;
}
- rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> TCE_SHIFT;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
@@ -237,7 +237,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
rc = plpar_tce_put_indirect((u64)tbl->it_index,
(u64)tcenum << 12,
- (u64)virt_to_abs(tcep),
+ (u64)__pa(tcep),
limit);
npages -= limit;
@@ -441,7 +441,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
rc = plpar_tce_put_indirect(liobn,
dma_offset,
- (u64)virt_to_abs(tcep),
+ (u64)__pa(tcep),
limit);
num_tce -= limit;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5f3ef876ded..0da39fed355 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -31,7 +31,6 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/machdep.h>
-#include <asm/abs_addr.h>
#include <asm/mmu_context.h>
#include <asm/iommu.h>
#include <asm/tlbflush.h>
@@ -108,9 +107,9 @@ void vpa_init(int cpu)
}
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long pa,
- unsigned long rflags, unsigned long vflags,
- int psize, int ssize)
+ unsigned long vpn, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize, int ssize)
{
unsigned long lpar_rc;
unsigned long flags;
@@ -118,11 +117,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
if (!(vflags & HPTE_V_BOLTED))
- pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
- "rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, va, pa, rflags, vflags, psize);
+ pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+ "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, vpn, pa, rflags, vflags, psize);
- hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+ hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
@@ -227,22 +226,6 @@ static void pSeries_lpar_hptab_clear(void)
}
/*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE. The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
- int ssize)
-{
- unsigned long v;
-
- v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
- v <<= HPTE_V_AVPN_SHIFT;
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
- return v;
-}
-
-/*
* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
* the low 3 bits of flags happen to line up. So no transform is needed.
* We can probably optimize here and assume the high bits of newpp are
@@ -250,14 +233,14 @@ static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
*/
static long pSeries_lpar_hpte_updatepp(unsigned long slot,
unsigned long newpp,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
unsigned long want_v;
- want_v = hpte_encode_avpn(va, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
want_v, slot, flags, psize);
@@ -295,15 +278,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
-static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize)
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
{
unsigned long hash;
unsigned long i;
long slot;
unsigned long want_v, hpte_v;
- hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_avpn(va, psize, ssize);
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
/* Bolted entries are always in the primary group */
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -323,12 +306,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea,
int psize, int ssize)
{
- unsigned long lpar_rc, slot, vsid, va, flags;
+ unsigned long vpn;
+ unsigned long lpar_rc, slot, vsid, flags;
vsid = get_kernel_vsid(ea, ssize);
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
- slot = pSeries_lpar_hpte_find(va, psize, ssize);
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
flags = newpp & 7;
@@ -337,17 +321,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
BUG_ON(lpar_rc != H_SUCCESS);
}
-static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
- pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
- slot, va, psize, local);
+ pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+ slot, vpn, psize, local);
- want_v = hpte_encode_avpn(va, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
if (lpar_rc == H_NOT_FOUND)
return;
@@ -358,15 +342,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
static void pSeries_lpar_hpte_removebolted(unsigned long ea,
int psize, int ssize)
{
- unsigned long slot, vsid, va;
+ unsigned long vpn;
+ unsigned long slot, vsid;
vsid = get_kernel_vsid(ea, ssize);
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
- slot = pSeries_lpar_hpte_find(va, psize, ssize);
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
- pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0);
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
}
/* Flag bits for H_BULK_REMOVE */
@@ -382,12 +367,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
*/
static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
{
+ unsigned long vpn;
unsigned long i, pix, rc;
unsigned long flags = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long param[9];
- unsigned long va;
unsigned long hash, index, shift, hidx, slot;
real_pte_t pte;
int psize, ssize;
@@ -399,21 +384,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
ssize = batch->ssize;
pix = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[i];
+ vpn = batch->vpn[i];
pte = batch->pte[i];
- pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
- hash = hpt_hash(va, shift, ssize);
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ hash = hpt_hash(vpn, shift, ssize);
hidx = __rpte_to_hidx(pte, index);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
- pSeries_lpar_hpte_invalidate(slot, va, psize,
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize,
ssize, local);
} else {
param[pix] = HBR_REQUEST | HBR_AVPN | slot;
- param[pix+1] = hpte_encode_avpn(va, psize,
+ param[pix+1] = hpte_encode_avpn(vpn, psize,
ssize);
pix += 2;
if (pix == 8) {
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 109fdb75578..d19f4977c83 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
{
struct device_node *dn;
+ struct eeh_dev *edev;
/* Found our PE and assume 8 at that point. */
@@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
if (!dn)
return NULL;
- dn = eeh_find_device_pe(dn);
+ /* Get the top level device in the PE */
+ edev = of_node_to_eeh_dev(dn);
+ edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+ dn = eeh_dev_to_of_node(edev);
if (!dn)
return NULL;
@@ -387,12 +391,13 @@ static int check_msix_entries(struct pci_dev *pdev)
return 0;
}
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
{
struct pci_dn *pdn;
int hwirq, virq, i, rc;
struct msi_desc *entry;
struct msi_msg msg;
+ int nvec = nvec_in;
pdn = get_pdn(pdev);
if (!pdn)
@@ -402,10 +407,23 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -EINVAL;
/*
+ * Firmware currently refuse any non power of two allocation
+ * so we round up if the quota will allow it.
+ */
+ if (type == PCI_CAP_ID_MSIX) {
+ int m = roundup_pow_of_two(nvec);
+ int quota = msi_quota_for_device(pdev, m);
+
+ if (quota >= m)
+ nvec = m;
+ }
+
+ /*
* Try the new more explicit firmware interface, if that fails fall
* back to the old interface. The old interface is known to never
* return MSI-Xs.
*/
+again:
if (type == PCI_CAP_ID_MSI) {
rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
@@ -417,6 +435,10 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
if (rc != nvec) {
+ if (nvec != nvec_in) {
+ nvec = nvec_in;
+ goto again;
+ }
pr_debug("rtas_msi: rtas_change_msi() failed\n");
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 2c6ded29f73..56b864d777e 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -73,7 +73,7 @@ void __init pSeries_final_fixup(void)
{
pSeries_request_regions();
- pci_addr_cache_build();
+ eeh_addr_cache_build();
}
/*
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 3ccebc83dc0..261a577a3dd 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -65,27 +65,43 @@ pcibios_find_pci_bus(struct device_node *dn)
EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
/**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * __pcibios_remove_pci_devices - remove all devices under this bus
+ * @bus: the indicated PCI bus
+ * @purge_pe: destroy the PE on removal of PCI devices
*
* Remove all of the PCI devices under this bus both from the
* linux pci device tree, and from the powerpc EEH address cache.
+ * By default, the corresponding PE will be destroied during the
+ * normal PCI hotplug path. For PCI hotplug during EEH recovery,
+ * the corresponding PE won't be destroied and deallocated.
*/
-void pcibios_remove_pci_devices(struct pci_bus *bus)
+void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
{
- struct pci_dev *dev, *tmp;
+ struct pci_dev *dev, *tmp;
struct pci_bus *child_bus;
/* First go down child busses */
list_for_each_entry(child_bus, &bus->children, node)
- pcibios_remove_pci_devices(child_bus);
+ __pcibios_remove_pci_devices(child_bus, purge_pe);
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
- pci_domain_nr(bus), bus->number);
+ pci_domain_nr(bus), bus->number);
list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
pr_debug(" * Removing %s...\n", pci_name(dev));
- eeh_remove_bus_device(dev);
- pci_stop_and_remove_bus_device(dev);
- }
+ eeh_remove_bus_device(dev, purge_pe);
+ pci_stop_and_remove_bus_device(dev);
+ }
+}
+
+/**
+ * pcibios_remove_pci_devices - remove all devices under this bus
+ *
+ * Remove all of the PCI devices under this bus both from the
+ * linux pci device tree, and from the powerpc EEH address cache.
+ */
+void pcibios_remove_pci_devices(struct pci_bus *bus)
+{
+ __pcibios_remove_pci_devices(bus, 1);
}
EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 455760b1fe6..45d00e5fe14 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -33,13 +33,6 @@ static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
static struct cpuidle_device __percpu *pseries_cpuidle_devices;
static struct cpuidle_state *cpuidle_state_table;
-void update_smt_snooze_delay(int snooze)
-{
- struct cpuidle_driver *drv = cpuidle_get_driver();
- if (drv)
- drv->states[0].target_residency = snooze;
-}
-
static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
{
@@ -66,32 +59,22 @@ static int snooze_loop(struct cpuidle_device *dev,
{
unsigned long in_purr;
ktime_t kt_before;
- unsigned long start_snooze;
- long snooze = drv->states[0].target_residency;
+ int cpu = dev->cpu;
idle_loop_prolog(&in_purr, &kt_before);
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
- if (snooze) {
- start_snooze = get_tb() + snooze * tb_ticks_per_usec;
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- while ((snooze < 0) || (get_tb() < start_snooze)) {
- if (need_resched() || cpu_is_offline(dev->cpu))
- goto out;
- ppc64_runlatch_off();
- HMT_low();
- HMT_very_low();
- }
-
- HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
- local_irq_disable();
+ while ((!need_resched()) && cpu_online(cpu)) {
+ ppc64_runlatch_off();
+ HMT_low();
+ HMT_very_low();
}
-out:
HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
@@ -172,8 +155,8 @@ static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
.name = "CEDE",
.desc = "CEDE",
.flags = CPUIDLE_FLAG_TIME_VALID,
- .exit_latency = 1,
- .target_residency = 10,
+ .exit_latency = 10,
+ .target_residency = 100,
.enter = &dedicated_cede_loop },
};
@@ -190,6 +173,23 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
.enter = &shared_cede_loop },
};
+void update_smt_snooze_delay(int cpu, int residency)
+{
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (cpuidle_state_table != dedicated_states)
+ return;
+
+ if (residency < 0) {
+ /* Disable the Nap state on that cpu */
+ if (dev)
+ dev->states_usage[1].disable = 1;
+ } else
+ if (drv)
+ drv->states[1].target_residency = residency;
+}
+
static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
unsigned long action, void *hcpu)
{
@@ -246,10 +246,6 @@ static int pseries_cpuidle_driver_init(void)
drv->states[drv->state_count] = /* structure copy */
cpuidle_state_table[idle_state];
- if (cpuidle_state_table == dedicated_states)
- drv->states[drv->state_count].target_residency =
- __get_cpu_var(smt_snooze_delay);
-
drv->state_count += 1;
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 51ecac920dd..e3cb7ae6165 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -388,10 +388,8 @@ static void __init pSeries_setup_arch(void)
/* Find and initialize PCI host bridges */
init_pci_config_tokens();
- eeh_pseries_init();
find_and_init_phbs();
pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
- eeh_init();
pSeries_nvram_init();
@@ -416,16 +414,20 @@ static int __init pSeries_init_panel(void)
}
machine_arch_initcall(pseries, pSeries_init_panel);
-static int pseries_set_dabr(unsigned long dabr)
+static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
{
return plpar_hcall_norets(H_SET_DABR, dabr);
}
-static int pseries_set_xdabr(unsigned long dabr)
+static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
{
- /* We want to catch accesses from kernel and userspace */
- return plpar_hcall_norets(H_SET_XDABR, dabr,
- H_DABRX_KERNEL | H_DABRX_USER);
+ /* Have to set at least one bit in the DABRX according to PAPR */
+ if (dabrx == 0 && dabr == 0)
+ dabrx = DABRX_USER;
+ /* PAPR says we can only set kernel and user bits */
+ dabrx &= DABRX_KERNEL | DABRX_USER;
+
+ return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
}
#define CMO_CHARACTERISTICS_TOKEN 44
@@ -529,10 +531,10 @@ static void __init pSeries_init_early(void)
if (firmware_has_feature(FW_FEATURE_LPAR))
hvc_vio_init_early();
#endif
- if (firmware_has_feature(FW_FEATURE_DABR))
- ppc_md.set_dabr = pseries_set_dabr;
- else if (firmware_has_feature(FW_FEATURE_XDABR))
+ if (firmware_has_feature(FW_FEATURE_XDABR))
ppc_md.set_dabr = pseries_set_xdabr;
+ else if (firmware_has_feature(FW_FEATURE_DABR))
+ ppc_md.set_dabr = pseries_set_dabr;
pSeries_cmo_feature_init();
iommu_init_early_pSeries();