From 33959f88fce9b8d3346d8000b3425814cbc6d6c0 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 18 Jul 2013 11:31:51 +1000 Subject: powerpc: Add second POWER8 PVR entry POWER8 comes with two different PVRs. This patch enables the additional PVR in the cputable. The existing entry (PVR=0x4b) is renamed to POWER8E and the new entry (PVR=0x4d) is given POWER8. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5d7d9c2a547..a6840e4e24f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1088,7 +1088,8 @@ #define PVR_970MP 0x0044 #define PVR_970GX 0x0045 #define PVR_POWER7p 0x004A -#define PVR_POWER8 0x004B +#define PVR_POWER8E 0x004B +#define PVR_POWER8 0x004D #define PVR_BE 0x0070 #define PVR_PA6T 0x0090 -- cgit v1.2.3-70-g09d2 From 0e0ed6406e61434d3f38fb58aa8464ec4722b77e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 15 Jul 2013 14:04:50 +1000 Subject: powerpc/modules: Module CRC relocation fix causes perf issues Module CRCs are implemented as absolute symbols that get resolved by a linker script. We build an intermediate .o that contains an unresolved symbol for each CRC. genksysms parses this .o, calculates the CRCs and writes a linker script that "resolves" the symbols to the calculated CRC. Unfortunately the ppc64 relocatable kernel sees these CRCs as symbols that need relocating and relocates them at boot. Commit d4703aef (module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y) added a hook to reverse the bogus relocations. Part of this patch created a symbol at 0x0: # head -2 /proc/kallsyms 0000000000000000 T reloc_start c000000000000000 T .__start This reloc_start symbol is causing lots of confusion to perf. It thinks reloc_start is a massive function that stretches from 0x0 to 0xc000000000000000 and we get various cryptic errors out of perf, including: problem incrementing symbol count, skipping event This patch removes the reloc_start linker script label and instead defines it as PHYSICAL_START. We also need to wrap it with CONFIG_PPC64 because the ppc32 kernel can set a non zero PHYSICAL_START at compile time and we wouldn't want to subtract it from the CRCs in that case. Signed-off-by: Anton Blanchard Cc: Acked-by: Rusty Russell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/module.h | 5 ++--- arch/powerpc/kernel/vmlinux.lds.S | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index c1df590ec44..49fa55bfbac 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -82,10 +82,9 @@ struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); -#ifdef CONFIG_MODVERSIONS +#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB - -extern const unsigned long reloc_start[]; +#define reloc_start PHYSICAL_START #endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 654e479802f..f096e72262f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - . = 0; - reloc_start = .; - . = KERNELBASE; /* -- cgit v1.2.3-70-g09d2 From 0b88f772bdf2847461debf417b1ee96043a7cb1b Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Mon, 15 Jul 2013 10:36:04 +0800 Subject: powerpc: Access local paca after hard irq disabled In hard_irq_disable(), we accessed the PACA before we hard disabled the interrupts, potentially causing a warning as get_paca() will us debug_smp_processor_id(). Move that to after the disabling, and also use local_paca directly rather than get_paca() to avoid several redundant and useless checks. Signed-off-by: Tiejun Chen Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hw_irq.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index ba713f166fa..10be1dd01c6 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -96,10 +96,11 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ - u8 _was_enabled = get_paca()->soft_enabled; \ + u8 _was_enabled; \ __hard_irq_disable(); \ - get_paca()->soft_enabled = 0; \ - get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ + _was_enabled = local_paca->soft_enabled; \ + local_paca->soft_enabled = 0; \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (_was_enabled) \ trace_hardirqs_off(); \ } while(0) -- cgit v1.2.3-70-g09d2 From f2856491d24044de08da9e53cf7068841a8b4e1c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:52 +0800 Subject: powerpc/eeh: Export functions for hotplug Make some functions public in order to support hotplug on either specific PCI bus or PCI device in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 9 +++++++++ arch/powerpc/kernel/eeh.c | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 09a8743143f..d9d35c27de2 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -209,9 +209,12 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); +void eeh_add_device_early(struct device_node *); void eeh_add_device_tree_early(struct device_node *); +void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); +void eeh_remove_device(struct pci_dev *, int); void eeh_remove_bus_device(struct pci_dev *, int); /** @@ -252,12 +255,18 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token static inline void eeh_addr_cache_build(void) { } +static inline void eeh_add_device_early(struct device_node *dn) { } + static inline void eeh_add_device_tree_early(struct device_node *dn) { } +static inline void eeh_add_device_late(struct pci_dev *dev) { } + static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } +static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { } + static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } #define EEH_POSSIBLE_ERROR(val, type) (0) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b5c425ea297..582ad1ef46a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -836,7 +836,7 @@ core_initcall_sync(eeh_init); * on the CEC architecture, type of the device, on earlier boot * command-line arguments & etc. */ -static void eeh_add_device_early(struct device_node *dn) +void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; @@ -884,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -static void eeh_add_device_late(struct pci_dev *dev) +void eeh_add_device_late(struct pci_dev *dev) { struct device_node *dn; struct eeh_dev *edev; @@ -972,7 +972,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -static void eeh_remove_device(struct pci_dev *dev, int purge_pe) +void eeh_remove_device(struct pci_dev *dev, int purge_pe) { struct eeh_dev *edev; -- cgit v1.2.3-70-g09d2 From 807a827d4e7455a40e8f56ec2a67c57a91cab9f7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:55 +0800 Subject: powerpc/eeh: Keep PE during hotplug When we do normal hotplug, the PE (shadow EEH structure) shouldn't be kept around. However, we need to keep it if the hotplug an artifial one caused by EEH errors recovery. Since we remove EEH device through the PCI hook pcibios_release_device(), the flag "purge_pe" passed to various functions is meaningless. So the patch removes the meaningless flag and introduce new flag "EEH_PE_KEEP" to save the PE while doing hotplug during EEH error recovery. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 11 +++++------ arch/powerpc/include/asm/pci-bridge.h | 1 - arch/powerpc/kernel/eeh.c | 28 ++-------------------------- arch/powerpc/kernel/eeh_driver.c | 7 +++++-- arch/powerpc/kernel/eeh_pe.c | 7 +++---- arch/powerpc/kernel/pci-hotplug.c | 26 +++++--------------------- 6 files changed, 20 insertions(+), 60 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d9d35c27de2..2ce22d7b71a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -55,6 +55,8 @@ struct device_node; #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ #define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ +#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ + struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ int state; /* PE EEH dependent mode */ @@ -193,7 +195,7 @@ int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); +int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); @@ -214,8 +216,7 @@ void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); -void eeh_remove_device(struct pci_dev *, int); -void eeh_remove_bus_device(struct pci_dev *, int); +void eeh_remove_device(struct pci_dev *); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -265,9 +266,7 @@ static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } -static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { } - -static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } +static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 2c1d8cb9b26..32d0d2018fa 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn); /** Remove all of the PCI devices under this bus */ -extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe); extern void pcibios_remove_pci_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 582ad1ef46a..ce81477316b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -964,7 +964,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed - * @purge_pe: remove the PE or not * * This routine should be called when a device is removed from * a running system (e.g. by hotplug or dlpar). It unregisters @@ -972,7 +971,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -void eeh_remove_device(struct pci_dev *dev, int purge_pe) +void eeh_remove_device(struct pci_dev *dev) { struct eeh_dev *edev; @@ -990,34 +989,11 @@ void eeh_remove_device(struct pci_dev *dev, int purge_pe) edev->pdev = NULL; dev->dev.archdata.edev = NULL; - eeh_rmv_from_parent_pe(edev, purge_pe); + eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } -/** - * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device - * @dev: PCI device - * @purge_pe: remove the corresponding PE or not - * - * This routine must be called when a device is removed from the - * running system through hotplug or dlpar. The corresponding - * PCI address cache will be removed. - */ -void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) -{ - struct pci_bus *bus = dev->subordinate; - struct pci_dev *child, *tmp; - - eeh_remove_device(dev, purge_pe); - - if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child, purge_pe); - } -} -EXPORT_SYMBOL_GPL(eeh_remove_bus_device); - static int proc_eeh_show(struct seq_file *m, void *v) { if (0 == eeh_subsystem_enabled) { diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2b1ce17cae5..9ef3bbb8580 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -362,8 +362,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * devices are expected to be attached soon when calling * into pcibios_add_pci_devices(). */ - if (bus) - __pcibios_remove_pci_devices(bus, 0); + if (bus) { + eeh_pe_state_mark(pe, EEH_PE_KEEP); + pcibios_remove_pci_devices(bus); + } /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -386,6 +388,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) if (bus) { ssleep(5); pcibios_add_pci_devices(bus); + eeh_pe_state_clear(pe, EEH_PE_KEEP); } pe->tstamp = tstamp; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 016588a6f5e..32ef40940ba 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~EEH_PE_INVALID; + parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); parent = parent->parent; } pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", @@ -397,14 +397,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /** * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE * @edev: EEH device - * @purge_pe: remove PE or not * * The PE hierarchy tree might be changed when doing PCI hotplug. * Also, the PCI devices or buses could be removed from the system * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) +int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; int cnt; @@ -431,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) if (pe->type & EEH_PE_PHB) break; - if (purge_pe) { + if (!(pe->state & EEH_PE_KEEP)) { if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) { list_del(&pe->child); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 3dab2f2801b..fc0831d4971 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -29,49 +29,33 @@ */ void pcibios_release_device(struct pci_dev *dev) { - eeh_remove_device(dev, 1); + eeh_remove_device(dev); } /** - * __pcibios_remove_pci_devices - remove all devices under this bus + * pcibios_remove_pci_devices - remove all devices under this bus * @bus: the indicated PCI bus - * @purge_pe: destroy the PE on removal of PCI devices * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. - * By default, the corresponding PE will be destroied during the - * normal PCI hotplug path. For PCI hotplug during EEH recovery, - * the corresponding PE won't be destroied and deallocated. */ -void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) +void pcibios_remove_pci_devices(struct pci_bus *bus) { struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, &bus->children, node) - __pcibios_remove_pci_devices(child_bus, purge_pe); + pcibios_remove_pci_devices(child_bus); pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev, purge_pe); + pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } } -/** - * pcibios_remove_pci_devices - remove all devices under this bus - * @bus: the indicated PCI bus - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - */ -void pcibios_remove_pci_devices(struct pci_bus *bus) -{ - __pcibios_remove_pci_devices(bus, 1); -} EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); /** -- cgit v1.2.3-70-g09d2 From 9feed42e93d2625db86423cedf8b4b2bed00779e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:56 +0800 Subject: powerpc/eeh: Use safe list traversal when walking EEH devices Currently, we're trasversing the EEH devices list using list_for_each_entry(). That's not safe enough because the EEH devices might be removed from its parent PE while doing iteration. The patch replaces that with list_for_each_entry_safe(). Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 4 ++-- arch/powerpc/kernel/eeh.c | 4 ++-- arch/powerpc/kernel/eeh_pe.c | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2ce22d7b71a..e8c411b63ca 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -74,8 +74,8 @@ struct eeh_pe { struct list_head child; /* Child PEs */ }; -#define eeh_pe_for_each_dev(pe, edev) \ - list_for_each_entry(edev, &pe->edevs, list) +#define eeh_pe_for_each_dev(pe, edev, tmp) \ + list_for_each_entry_safe(edev, tmp, &pe->edevs, list) /* * The struct is used to trace EEH state for the associated diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ce81477316b..56bd4584f61 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; bool valid_cfg_log = true; /* @@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, EEH_PCI_REGS_LOG_LEN - loglen); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 32ef40940ba..c8b815e45c8 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; void *ret; if (!root) { @@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, /* Traverse root PE */ for (pe = root; pe; pe = eeh_pe_next(pe, root)) { - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { ret = fn(edev, flag); if (ret) return ret; @@ -501,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); - struct eeh_dev *tmp; + struct eeh_dev *edev, *tmp; struct pci_dev *pdev; /* @@ -511,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag) * the PCI device driver. */ pe->state |= state; - eeh_pe_for_each_dev(pe, tmp) { - pdev = eeh_dev_to_pci_dev(tmp); + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); if (pdev) pdev->error_state = pci_channel_io_frozen; } -- cgit v1.2.3-70-g09d2 From f5c57710dd62dd06f176934a8b4b8accbf00f9f8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:58 +0800 Subject: powerpc/eeh: Use partial hotplug for EEH unaware drivers When EEH error happens to one specific PE, some devices with drivers supporting EEH won't except hotplug on the device. However, there might have other deivces without driver, or with driver without EEH support. For the case, we need do partial hotplug in order to make sure that the PE becomes absolutely quite during reset. Otherise, the PE reset might fail and leads to failure of error recovery. The current code doesn't handle that 'mixed' case properly, it either uses the error callbacks to the drivers, or tries hotplug, but doesn't handle a PE (EEH domain) composed of a combination of the two. The patch intends to support so-called "partial" hotplug for EEH: Before we do reset, we stop and remove those PCI devices without EEH sensitive driver. The corresponding EEH devices are not detached from its PE, but with special flag. After the reset is done, those EEH devices with the special flag will be scanned one by one. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 6 ++- arch/powerpc/kernel/eeh.c | 30 +++++++++-- arch/powerpc/kernel/eeh_driver.c | 74 ++++++++++++++++++++++++++-- arch/powerpc/kernel/eeh_pe.c | 20 +++----- arch/powerpc/kernel/eeh_sysfs.c | 7 +++ arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 7 files changed, 117 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index e8c411b63ca..f54a60131de 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -84,7 +84,8 @@ struct eeh_pe { * another tree except the currently existing tree of PCI * buses and PCI devices */ -#define EEH_DEV_IRQ_DISABLED (1<<0) /* Interrupt disabled */ +#define EEH_DEV_IRQ_DISABLED (1 << 0) /* Interrupt disabled */ +#define EEH_DEV_DISCONNECTED (1 << 1) /* Removing from PE */ struct eeh_dev { int mode; /* EEH mode */ @@ -97,6 +98,7 @@ struct eeh_dev { struct pci_controller *phb; /* Associated PHB */ struct device_node *dn; /* Associated device node */ struct pci_dev *pdev; /* Associated PCI device */ + struct pci_bus *bus; /* PCI bus for partial hotplug */ }; static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) @@ -197,6 +199,8 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 56bd4584f61..a5783f1a7a9 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -900,7 +900,21 @@ void eeh_add_device_late(struct pci_dev *dev) pr_debug("EEH: Already referenced !\n"); return; } - WARN_ON(edev->pdev); + + /* + * The EEH cache might not be removed correctly because of + * unbalanced kref to the device during unplug time, which + * relies on pcibios_release_device(). So we have to remove + * that here explicitly. + */ + if (edev->pdev) { + eeh_rmv_from_parent_pe(edev); + eeh_addr_cache_rmv_dev(edev->pdev); + eeh_sysfs_remove_device(edev->pdev); + + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; + } edev->pdev = dev; dev->dev.archdata.edev = edev; @@ -982,14 +996,24 @@ void eeh_remove_device(struct pci_dev *dev) /* Unregister the device with the EEH/PCI address search system */ pr_debug("EEH: Removing device %s\n", pci_name(dev)); - if (!edev || !edev->pdev) { + if (!edev || !edev->pdev || !edev->pe) { pr_debug("EEH: Not referenced !\n"); return; } + + /* + * During the hotplug for EEH error recovery, we need the EEH + * device attached to the parent PE in order for BAR restore + * a bit later. So we keep it for BAR restore and remove it + * from the parent PE during the BAR resotre. + */ edev->pdev = NULL; dev->dev.archdata.edev = NULL; + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_rmv_from_parent_pe(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; - eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 9ef3bbb8580..9fda75d1f5a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -338,6 +338,54 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_rmv_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + int *removed = (int *)userdata; + + /* + * Actually, we should remove the PCI bridges as well. + * However, that's lots of complexity to do that, + * particularly some of devices under the bridge might + * support EEH. So we just care about PCI devices for + * simplicity here. + */ + if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) + return NULL; + driver = eeh_pcid_get(dev); + if (driver && driver->err_handler) + return NULL; + + /* Remove it from PCI subsystem */ + pr_debug("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); + edev->bus = dev->bus; + edev->mode |= EEH_DEV_DISCONNECTED; + (*removed)++; + + pci_stop_and_remove_bus_device(dev); + + return NULL; +} + +static void *eeh_pe_detach_dev(void *data, void *userdata) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev, *tmp; + + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!(edev->mode & EEH_DEV_DISCONNECTED)) + continue; + + edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); + eeh_rmv_from_parent_pe(edev); + } + + return NULL; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -349,8 +397,9 @@ static void *eeh_report_failure(void *data, void *userdata) */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { + struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc; + int cnt, rc, removed = 0; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -362,10 +411,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * devices are expected to be attached soon when calling * into pcibios_add_pci_devices(). */ - if (bus) { - eeh_pe_state_mark(pe, EEH_PE_KEEP); + eeh_pe_state_mark(pe, EEH_PE_KEEP); + if (bus) pcibios_remove_pci_devices(bus); - } + else if (frozen_bus) + eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -386,10 +436,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * potentially weird things happen. */ if (bus) { + pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); ssleep(5); + + /* + * The EEH device is still connected with its parent + * PE. We should disconnect it so the binding can be + * rebuilt when adding PCI devices. + */ + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); pcibios_add_pci_devices(bus); - eeh_pe_state_clear(pe, EEH_PE_KEEP); + } else if (frozen_bus && removed) { + pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); + ssleep(5); + + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); + pcibios_add_pci_devices(frozen_bus); } + eeh_pe_state_clear(pe, EEH_PE_KEEP); pe->tstamp = tstamp; pe->freeze_count = cnt; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index c8b815e45c8..2aa955ae01a 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -149,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, * callback returns something other than NULL, or no more PEs * to be traversed. */ -static void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; void *ret; @@ -409,8 +409,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) int cnt; if (!edev->pe) { - pr_warning("%s: No PE found for EEH device %s\n", - __func__, edev->dn->full_name); + pr_debug("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); return -EEXIST; } @@ -728,18 +728,12 @@ static void eeh_restore_device_bars(struct eeh_dev *edev, */ static void *eeh_restore_one_device_bars(void *data, void *flag) { - struct pci_dev *pdev = NULL; struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct device_node *dn = eeh_dev_to_of_node(edev); - /* Trace the PCI bridge */ - if (eeh_probe_mode_dev()) { - pdev = eeh_dev_to_pci_dev(edev); - if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - pdev = NULL; - } - - if (pdev) + /* Do special restore for bridges */ + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) eeh_restore_bridge_bars(pdev, edev, dn); else eeh_restore_device_bars(edev, dn); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e7ae3484918..61e2a145213 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -68,6 +68,13 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) void eeh_sysfs_remove_device(struct pci_dev *pdev) { + /* + * The parent directory might have been removed. We needn't + * continue for that case. + */ + if (!pdev->dev.kobj.sd) + return; + device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 969cce73055..a380428cf9c 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -114,7 +114,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * the root bridge. So it's not reasonable to continue * the probing. */ - if (!dn || !edev) + if (!dn || !edev || edev->pe) return 0; /* Skip for PCI-ISA bridge */ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b456b157d33..0f44f9fe49a 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -153,7 +153,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Retrieve OF node and eeh device */ edev = of_node_to_eeh_dev(dn); - if (!of_device_is_available(dn)) + if (edev->pe || !of_device_is_available(dn)) return NULL; /* Retrieve class/vendor/device IDs */ -- cgit v1.2.3-70-g09d2 From 4b83bd452f17582c8d1c916568fd28a237a2eb46 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:59 +0800 Subject: powerpc/eeh: Don't use pci_dev during BAR restore While restoring BARs for one specific PCI device, the pci_dev instance should have been released. So it's not reliable to use the pci_dev instance on restoring BARs. However, we still need some information (e.g. PCIe capability position, header type) from the pci_dev instance. So we have to store those information to EEH device in advance. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 8 +++- arch/powerpc/kernel/eeh_pe.c | 25 ++++++----- arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++ arch/powerpc/platforms/pseries/eeh_pseries.c | 63 +++++++++++++++++++++++++++- 4 files changed, 91 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f54a60131de..4199d994327 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -84,8 +84,11 @@ struct eeh_pe { * another tree except the currently existing tree of PCI * buses and PCI devices */ -#define EEH_DEV_IRQ_DISABLED (1 << 0) /* Interrupt disabled */ -#define EEH_DEV_DISCONNECTED (1 << 1) /* Removing from PE */ +#define EEH_DEV_BRIDGE (1 << 0) /* PCI bridge */ +#define EEH_DEV_ROOT_PORT (1 << 1) /* PCIe root port */ +#define EEH_DEV_DS_PORT (1 << 2) /* Downstream port */ +#define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */ +#define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */ struct eeh_dev { int mode; /* EEH mode */ @@ -93,6 +96,7 @@ struct eeh_dev { int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ + u8 pcie_cap; /* Saved PCIe capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 2aa955ae01a..f9450537e33 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -578,7 +578,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) * blocked on normal path during the stage. So we need utilize * eeh operations, which is always permitted. */ -static void eeh_bridge_check_link(struct pci_dev *pdev, +static void eeh_bridge_check_link(struct eeh_dev *edev, struct device_node *dn) { int cap; @@ -589,16 +589,17 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, * We only check root port and downstream ports of * PCIe switches */ - if (!pci_is_pcie(pdev) || - (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && - pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) + if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT))) return; - pr_debug("%s: Check PCIe link for %s ...\n", - __func__, pci_name(pdev)); + pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", + __func__, edev->phb->global_number, + edev->config_addr >> 8, + PCI_SLOT(edev->config_addr & 0xFF), + PCI_FUNC(edev->config_addr & 0xFF)); /* Check slot status */ - cap = pdev->pcie_cap; + cap = edev->pcie_cap; eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { pr_debug(" No card in the slot (0x%04x) !\n", val); @@ -652,8 +653,7 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) -static void eeh_restore_bridge_bars(struct pci_dev *pdev, - struct eeh_dev *edev, +static void eeh_restore_bridge_bars(struct eeh_dev *edev, struct device_node *dn) { int i; @@ -679,7 +679,7 @@ static void eeh_restore_bridge_bars(struct pci_dev *pdev, eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); /* Check the PCIe link is ready */ - eeh_bridge_check_link(pdev, dn); + eeh_bridge_check_link(edev, dn); } static void eeh_restore_device_bars(struct eeh_dev *edev, @@ -729,12 +729,11 @@ static void eeh_restore_device_bars(struct eeh_dev *edev, static void *eeh_restore_one_device_bars(void *data, void *flag) { struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct device_node *dn = eeh_dev_to_of_node(edev); /* Do special restore for bridges */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - eeh_restore_bridge_bars(pdev, edev, dn); + if (edev->mode & EEH_DEV_BRIDGE) + eeh_restore_bridge_bars(edev, dn); else eeh_restore_device_bars(edev, dn); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index a380428cf9c..4361a5c7f3a 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -124,6 +124,17 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) /* Initialize eeh device */ edev->class_code = dev->class; edev->mode = 0; + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + edev->mode |= EEH_DEV_BRIDGE; + if (pci_is_pcie(dev)) { + edev->pcie_cap = pci_pcie_cap(dev); + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + edev->config_addr = ((dev->bus->number << 8) | dev->devfn); edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 0f44f9fe49a..9e80f0af311 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -133,6 +133,48 @@ static int pseries_eeh_init(void) return 0; } +static int pseries_eeh_cap_start(struct device_node *dn) +{ + struct pci_dn *pdn = PCI_DN(dn); + u32 status; + + if (!pdn) + return 0; + + rtas_read_config(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + return PCI_CAPABILITY_LIST; +} + + +static int pseries_eeh_find_cap(struct device_node *dn, int cap) +{ + struct pci_dn *pdn = PCI_DN(dn); + int pos = pseries_eeh_cap_start(dn); + int cnt = 48; /* Maximal number of capabilities */ + u32 id; + + if (!pos) + return 0; + + while (cnt--) { + rtas_read_config(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + /** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node @@ -146,8 +188,10 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) { struct eeh_dev *edev; struct eeh_pe pe; + struct pci_dn *pdn = PCI_DN(dn); const u32 *class_code, *vendor_id, *device_id; const u32 *regs; + u32 pcie_flags; int enable = 0; int ret; @@ -167,9 +211,26 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) if (dn->type && !strcmp(dn->type, "isa")) return NULL; - /* Update class code and mode of eeh device */ + /* + * Update class code and mode of eeh device. We need + * correctly reflects that current device is root port + * or PCIe switch downstream port. + */ edev->class_code = *class_code; + edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); edev->mode = 0; + if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + edev->mode |= EEH_DEV_BRIDGE; + if (edev->pcie_cap) { + rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + } /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); -- cgit v1.2.3-70-g09d2 From ab55d2187da27414f78056810713c92f9a4350c2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:25:01 +0800 Subject: powerpc/eeh: Introdce flag to protect sysfs The patch introduces flag EEH_DEV_SYSFS to keep track that the sysfs entries for the corresponding EEH device (then PCI device) has been added or removed, in order to avoid race condition. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 2 ++ arch/powerpc/kernel/eeh.c | 2 ++ arch/powerpc/kernel/eeh_sysfs.c | 16 +++++++++++++++- arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++-- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 5 files changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 4199d994327..d3e5e9bc8f9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -90,6 +90,8 @@ struct eeh_pe { #define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */ #define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */ +#define EEH_DEV_SYSFS (1 << 8) /* Sysfs created */ + struct eeh_dev { int mode; /* EEH mode */ int class_code; /* Class code of the device */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a5783f1a7a9..ea9414c8088 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -911,6 +911,7 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); + edev->mode &= ~EEH_DEV_SYSFS; edev->pdev = NULL; dev->dev.archdata.edev = NULL; @@ -1016,6 +1017,7 @@ void eeh_remove_device(struct pci_dev *dev) eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); + edev->mode &= ~EEH_DEV_SYSFS; } static int proc_eeh_show(struct seq_file *m, void *v) diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 61e2a145213..5d753d4f2c7 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -56,26 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); void eeh_sysfs_add_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); int rc=0; + if (edev && (edev->mode & EEH_DEV_SYSFS)) + return; + rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); if (rc) printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + else if (edev) + edev->mode |= EEH_DEV_SYSFS; } void eeh_sysfs_remove_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + /* * The parent directory might have been removed. We needn't * continue for that case. */ - if (!pdev->dev.kobj.sd) + if (!pdev->dev.kobj.sd) { + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; return; + } device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4361a5c7f3a..79663d26e6e 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -122,8 +122,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) return 0; /* Initialize eeh device */ - edev->class_code = dev->class; - edev->mode = 0; + edev->class_code = dev->class; + edev->mode &= 0xFFFFFF00; if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) edev->mode |= EEH_DEV_BRIDGE; if (pci_is_pcie(dev)) { diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 9e80f0af311..7fbc25b1813 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -218,7 +218,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) */ edev->class_code = *class_code; edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); - edev->mode = 0; + edev->mode &= 0xFFFFFF00; if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { -- cgit v1.2.3-70-g09d2 From 8d7c55d01e4648605fd0dacc82d8d3989ead4db7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Jul 2013 18:07:45 +1000 Subject: powerpc/perf: Export PERF_EVENT_CONFIG_EBB_SHIFT to userspace We use bit 63 of the event code for userspace to request that the event be counted using EBB (Event Based Branches). Export this value, making it part of the API - though only on processors that support EBB. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/perf_event_server.h | 6 +----- arch/powerpc/include/uapi/asm/Kbuild | 1 + arch/powerpc/include/uapi/asm/perf_event.h | 18 ++++++++++++++++++ arch/powerpc/perf/core-book3s.c | 2 +- arch/powerpc/perf/power8-pmu.c | 6 +++--- 5 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/perf_event.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 2dd7bfc459b..8b249264475 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -12,6 +12,7 @@ #include #include #include +#include #define MAX_HWEVENTS 8 #define MAX_EVENT_ALTERNATIVES 8 @@ -69,11 +70,6 @@ struct power_pmu { #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ -/* - * We use the event config bit 63 as a flag to request EBB. - */ -#define EVENT_CONFIG_EBB_SHIFT 63 - extern int register_power_pmu(struct power_pmu *); struct pt_regs; diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 5182c8622b5..48be855ef37 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -20,6 +20,7 @@ header-y += mman.h header-y += msgbuf.h header-y += nvram.h header-y += param.h +header-y += perf_event.h header-y += poll.h header-y += posix_types.h header-y += ps3fb.h diff --git a/arch/powerpc/include/uapi/asm/perf_event.h b/arch/powerpc/include/uapi/asm/perf_event.h new file mode 100644 index 00000000000..80a4d40cf5b --- /dev/null +++ b/arch/powerpc/include/uapi/asm/perf_event.h @@ -0,0 +1,18 @@ +/* + * Copyright 2013 Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + */ + +#ifndef _UAPI_ASM_POWERPC_PERF_EVENT_H +#define _UAPI_ASM_POWERPC_PERF_EVENT_H + +/* + * We use bit 63 of perf_event_attr.config as a flag to request EBB. + */ +#define PERF_EVENT_CONFIG_EBB_SHIFT 63 + +#endif /* _UAPI_ASM_POWERPC_PERF_EVENT_H */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 24a45f91c65..eeae308cf98 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -484,7 +484,7 @@ static bool is_ebb_event(struct perf_event *event) * use bit 63 of the event code for something else if they wish. */ return (ppmu->flags & PPMU_EBB) && - ((event->attr.config >> EVENT_CONFIG_EBB_SHIFT) & 1); + ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1); } static int ebb_event_check(struct perf_event *event) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 7466374d278..2ee4a707f0d 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -118,7 +118,7 @@ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ - (EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT) | \ + (EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT) | \ EVENT_PSEL_MASK) /* MMCRA IFM bits - POWER8 */ @@ -233,10 +233,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; - ebb = (event >> EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; + ebb = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; /* Clear the EBB bit in the event, so event checks work below */ - event &= ~(EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT); + event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT); if (pmc) { if (pmc > 6) -- cgit v1.2.3-70-g09d2 From 3be7db6ab45b21345386d1a466da133b19cde5e4 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Wed, 24 Jul 2013 20:13:21 -0500 Subject: powerpc: VPHN topology change updates all siblings When an associativity level change is found for one thread, the siblings threads need to be updated as well. This is done today for PRRN in stage_topology_update() but is missing for VPHN in update_cpu_associativity_changes_mask(). This patch will correctly update all thread siblings during a topology change. Without this patch a topology update can result in a CPU in init_sched_groups_power() getting stuck indefinitely in a loop. This loop is built in build_sched_groups(). As a result of the thread moving to a node separate from its siblings the struct sched_group will have its next pointer set to point to itself rather than the sched_group struct of the next thread. This happens because we have a domain without the SD_OVERLAP flag, which is correct, and a topology that doesn't conform with reality (threads on the same core assigned to different numa nodes). When this list is traversed by init_sched_groups_power() it will reach the thread's sched_group structure and loop indefinitely; the cpu will be stuck at this point. The bug was exposed when VPHN was enabled in commit b7abef0 (v3.9). Cc: [v3.9+] Reported-by: Jan Stancek Signed-off-by: Robert Jennings Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/smp.h | 4 +++ arch/powerpc/mm/numa.c | 59 +++++++++++++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ffbaabebcdc..48cfc858abd 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -145,6 +145,10 @@ extern void __cpu_die(unsigned int cpu); #define smp_setup_cpu_maps() static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} +static inline const struct cpumask *cpu_sibling_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 08397217e8a..5850798826c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1318,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void) } } if (changed) { - cpumask_set_cpu(cpu, changes); + cpumask_or(changes, changes, cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); } } @@ -1426,7 +1428,7 @@ static int update_cpu_topology(void *data) if (!data) return -EINVAL; - cpu = get_cpu(); + cpu = smp_processor_id(); for (update = data; update; update = update->next) { if (cpu != update->cpu) @@ -1446,12 +1448,12 @@ static int update_cpu_topology(void *data) */ int arch_update_cpu_topology(void) { - unsigned int cpu, changed = 0; + unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; cpumask_t updated_cpus; struct device *dev; - int weight, i = 0; + int weight, new_nid, i = 0; weight = cpumask_weight(&cpu_associativity_changes_mask); if (!weight) @@ -1464,19 +1466,46 @@ int arch_update_cpu_topology(void) cpumask_clear(&updated_cpus); for_each_cpu(cpu, &cpu_associativity_changes_mask) { - ud = &updates[i++]; - ud->cpu = cpu; - vphn_get_associativity(cpu, associativity); - ud->new_nid = associativity_to_nid(associativity); - - if (ud->new_nid < 0 || !node_online(ud->new_nid)) - ud->new_nid = first_online_node; + /* + * If siblings aren't flagged for changes, updates list + * will be too short. Skip on this update and set for next + * update. + */ + if (!cpumask_subset(cpu_sibling_mask(cpu), + &cpu_associativity_changes_mask)) { + pr_info("Sibling bits not set for associativity " + "change, cpu%d\n", cpu); + cpumask_or(&cpu_associativity_changes_mask, + &cpu_associativity_changes_mask, + cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); + continue; + } - ud->old_nid = numa_cpu_lookup_table[cpu]; - cpumask_set_cpu(cpu, &updated_cpus); + /* Use associativity from first thread for all siblings */ + vphn_get_associativity(cpu, associativity); + new_nid = associativity_to_nid(associativity); + if (new_nid < 0 || !node_online(new_nid)) + new_nid = first_online_node; + + if (new_nid == numa_cpu_lookup_table[cpu]) { + cpumask_andnot(&cpu_associativity_changes_mask, + &cpu_associativity_changes_mask, + cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); + continue; + } - if (i < weight) - ud->next = &updates[i]; + for_each_cpu(sibling, cpu_sibling_mask(cpu)) { + ud = &updates[i++]; + ud->cpu = sibling; + ud->new_nid = new_nid; + ud->old_nid = numa_cpu_lookup_table[sibling]; + cpumask_set_cpu(sibling, &updated_cpus); + if (i < weight) + ud->next = &updates[i]; + } + cpu = cpu_last_thread_sibling(cpu); } stop_machine(update_cpu_topology, &updates[0], &updated_cpus); -- cgit v1.2.3-70-g09d2 From 74e400cee6c0266ba2d940ed78d981f1e24a8167 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:28 +1000 Subject: powerpc: Rework setting up H/FSCR bit definitions This reworks the Facility Status and Control Regsiter (FSCR) config bit definitions so that we can access the bit numbers. This is needed for a subsequent patch to fix the userspace DSCR handling. HFSCR and FSCR bit definitions are the same, so reuse them. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a6840e4e24f..99222e27f17 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -254,19 +254,28 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ +/* HFSCR and FSCR bit numbers are the same */ +#define FSCR_TAR_LG 8 /* Enable Target Address Register */ +#define FSCR_EBB_LG 7 /* Enable Event Based Branching */ +#define FSCR_TM_LG 5 /* Enable Transactional Memory */ +#define FSCR_PM_LG 4 /* Enable prob/priv access to PMU SPRs */ +#define FSCR_BHRB_LG 3 /* Enable Branch History Rolling Buffer*/ +#define FSCR_DSCR_LG 2 /* Enable Data Stream Control Register */ +#define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ +#define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ -#define FSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ -#define FSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ +#define FSCR_TAR __MASK(FSCR_TAR_LG) +#define FSCR_EBB __MASK(FSCR_EBB_LG) +#define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ -#define HFSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ -#define HFSCR_TM (1 << (63-58)) /* Enable Transactional Memory */ -#define HFSCR_PM (1 << (63-60)) /* Enable prob/priv access to PMU SPRs */ -#define HFSCR_BHRB (1 << (63-59)) /* Enable Branch History Rolling Buffer*/ -#define HFSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ -#define HFSCR_VECVSX (1 << (63-62)) /* Enable VMX/VSX */ -#define HFSCR_FP (1 << (63-63)) /* Enable Floating Point */ +#define HFSCR_TAR __MASK(FSCR_TAR_LG) +#define HFSCR_EBB __MASK(FSCR_EBB_LG) +#define HFSCR_TM __MASK(FSCR_TM_LG) +#define HFSCR_PM __MASK(FSCR_PM_LG) +#define HFSCR_BHRB __MASK(FSCR_BHRB_LG) +#define HFSCR_DSCR __MASK(FSCR_DSCR_LG) +#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) +#define HFSCR_FP __MASK(FSCR_FP_LG) #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ #define LPCR_VPM0 (1ul << (63-0)) -- cgit v1.2.3-70-g09d2 From c2d52644e2da8a07ecab5ca62dd0bc563089e8dc Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:30 +1000 Subject: powerpc: Save the TAR register earlier This moves us to save the Target Address Register (TAR) a earlier in __switch_to. It introduces a new function save_tar() to do this. We need to save the TAR earlier as we will overwrite it in the transactional memory reclaim/recheckpoint path. We are going to do this in a subsequent patch which will fix saving the TAR register when it's modified inside a transaction. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/switch_to.h | 9 +++++++++ arch/powerpc/kernel/entry_64.S | 9 --------- arch/powerpc/kernel/process.c | 10 ++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 49a13e0ef23..294c2cedcf7 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -15,6 +15,15 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void save_tar(struct thread_struct *prev) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + prev->tar = mfspr(SPRN_TAR); +} +#else +static inline void save_tar(struct thread_struct *prev) {} +#endif extern void giveup_fpu(struct task_struct *); extern void load_up_fpu(void); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4674fe647ad..2bd0b885b0f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -449,15 +449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #ifdef CONFIG_PPC_BOOK3S_64 BEGIN_FTR_SECTION - /* - * Back up the TAR across context switches. Note that the TAR is not - * available for use in the kernel. (To provide this, the TAR should - * be backed up/restored on exception entry/exit instead, and be in - * pt_regs. FIXME, this should be in pt_regs anyway (for debug).) - */ - mfspr r0,SPRN_TAR - std r0,THREAD_TAR(r3) - /* Event based branch registers */ mfspr r0, SPRN_BESCR std r0, THREAD_BESCR(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c517dbe705f..8083be20fe5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -600,6 +600,16 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + /* Back up the TAR across context switches. + * Note that the TAR is not available for use in the kernel. (To + * provide this, the TAR should be backed up/restored on exception + * entry/exit instead, and be in pt_regs. FIXME, this should be in + * pt_regs anyway (for debug).) + * Save the TAR here before we do treclaim/trecheckpoint as these + * will change the TAR. + */ + save_tar(&prev->thread); + __switch_to_tm(prev); #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From 28e61cc466d8daace4b0f04ba2b83e0bd68f5832 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:31 +1000 Subject: powerpc/tm: Fix context switching TAR, PPR and DSCR SPRs If a transaction is rolled back, the Target Address Register (TAR), Processor Priority Register (PPR) and Data Stream Control Register (DSCR) should be restored to the checkpointed values before the transaction began. Any changes to these SPRs inside the transaction should not be visible in the abort handler. Currently Linux doesn't save or restore the checkpointed TAR, PPR or DSCR. If we preempt a processes inside a transaction which has modified any of these, on process restore, that same transaction may be aborted we but we won't see the checkpointed versions of these SPRs. This adds checkpointed versions of these SPRs to the thread_struct and adds the save/restore of these three SPRs to the treclaim/trechkpt code. Without this if any of these SPRs are modified during a transaction, users may incorrectly see a speculated SPR value even if the transaction is aborted. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 4 ++++ arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/kernel/tm.S | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 47a35b08b96..e378cccfca5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -247,6 +247,10 @@ struct thread_struct { unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */ struct pt_regs ckpt_regs; /* Checkpointed registers */ + unsigned long tm_tar; + unsigned long tm_ppr; + unsigned long tm_dscr; + /* * Transactional FP and VSX 0-31 register set. * NOTE: the sense of these is the opposite of the integer ckpt_regs! diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c7e8afc2ead..8207459efe5 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -138,6 +138,9 @@ int main(void) DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar)); + DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); + DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, transact_vr[0])); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 51be8fb2480..0554d1f6d70 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -233,6 +233,16 @@ dont_backup_fp: std r5, _CCR(r7) std r6, _XER(r7) + + /* ******************** TAR, PPR, DSCR ********** */ + mfspr r3, SPRN_TAR + mfspr r4, SPRN_PPR + mfspr r5, SPRN_DSCR + + std r3, THREAD_TM_TAR(r12) + std r4, THREAD_TM_PPR(r12) + std r5, THREAD_TM_DSCR(r12) + /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. */ @@ -347,6 +357,16 @@ dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: + + /* ******************** TAR, PPR, DSCR ********** */ + ld r4, THREAD_TM_TAR(r3) + ld r5, THREAD_TM_PPR(r3) + ld r6, THREAD_TM_DSCR(r3) + + mtspr SPRN_TAR, r4 + mtspr SPRN_PPR, r5 + mtspr SPRN_DSCR, r6 + /* ******************** CR,LR,CCR,MSR ********** */ ld r3, _CTR(r7) ld r4, _LINK(r7) -- cgit v1.2.3-70-g09d2