From 8fe7e94eb71430cf63a742f3c19739d82a662758 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 1 Jun 2011 15:31:44 +0200 Subject: oprofile, x86: Fix race in nmi handler while starting counters In some rare cases, nmis are generated immediately after the nmi handler of the cpu was started. This causes the counter not to be enabled. Before enabling the nmi handlers we need to set variable ctr_running first and make sure its value is written to memory. Also, the patch makes all existing barriers a memory barrier instead of a compiler barrier only. Reported-by: Suravee Suthikulpanit Cc: # .35+ Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cf9750004a0..68894fdc034 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -112,8 +112,10 @@ static void nmi_cpu_start(void *dummy) static int nmi_start(void) { get_online_cpus(); - on_each_cpu(nmi_cpu_start, NULL, 1); ctr_running = 1; + /* make ctr_running visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_start, NULL, 1); put_online_cpus(); return 0; } @@ -504,15 +506,18 @@ static int nmi_setup(void) nmi_enabled = 0; ctr_running = 0; - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); err = register_die_notifier(&profile_exceptions_nb); if (err) goto fail; get_online_cpus(); register_cpu_notifier(&oprofile_cpu_nb); - on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; + /* make nmi_enabled visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_setup, NULL, 1); put_online_cpus(); return 0; @@ -531,7 +536,8 @@ static void nmi_shutdown(void) nmi_enabled = 0; ctr_running = 0; put_online_cpus(); - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); unregister_die_notifier(&profile_exceptions_nb); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); -- cgit v1.2.3-70-g09d2 From a26474e8649643e82d71e3a386d5c4bcc0b207ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Jun 2011 11:41:07 +0200 Subject: x86-32, NUMA: Fix boot regression caused by NUMA init unification on highmem machines During 32/64 NUMA init unification, commit 797390d855 ("x86-32, NUMA: use sparse_memory_present_with_active_regions()") made 32bit mm init call memory_present() automatically from active_regions instead of leaving it to each NUMA init path. This commit description is inaccurate - memory_present() calls aren't the same for flat and numaq. After the commit, memory_present() is only called for the intersection of e820 and NUMA layout. Before, on flatmem, memory_present() would be called from 0 to max_pfn. After, it would be called only on the areas that e820 indicates to be populated. This is how x86_64 works and should be okay as memmap is allowed to contain holes; however, x86_32 DISCONTIGMEM is missing early_pfn_valid(), which makes memmap_init_zone() assume that memmap doesn't contain any hole. This leads to the following oops if e820 map contains holes as it often does on machine with near or more 4GiB of memory by calling pfn_to_page() on a pfn which isn't mapped to a NUMA node, a reported by Conny Seidel: BUG: unable to handle kernel paging request at 000012b0 IP: [] memmap_init_zone+0x6c/0xf2 *pdpt =3D 0000000000000000 *pde =3D f000eef3f000ee00 Oops: 0000 [#1] SMP last sysfs file: Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.39-rc5-00164-g797390d #1 To Be Filled By O.E.M. To Be Filled By O.E.M./E350M1 EIP: 0060:[] EFLAGS: 00010012 CPU: 0 EIP is at memmap_init_zone+0x6c/0xf2 EAX: 00000000 EBX: 000a8000 ECX: 000a7fff EDX: f2c00b80 ESI: 000a8000 EDI: f2c00800 EBP: c19ffe54 ESP: c19ffe34 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 0, ti=3Dc19fe000 task=3Dc1a07f60 task.ti=3Dc19fe000) Stack: 00000002 00000000 0023f000 00000000 10000000 00000a00 f2c00000 f2c00b58 c19ffeb0 c1a80f24 000375fe 00000000 f2c00800 00000800 00000100 00000030 c1abb768 0000003c 00000000 00000000 00000004 00207a02 f2c00800 000375fe Call Trace: [] free_area_init_node+0x358/0x385 [] free_area_init_nodes+0x420/0x487 [] paging_init+0x114/0x11b [] setup_arch+0xb37/0xc0a [] start_kernel+0x76/0x316 [] i386_start_kernel+0xa8/0xb0 This patch fixes the bug by defining early_pfn_valid() to be the same as pfn_valid() when DISCONTIGMEM. Reported-bisected-and-tested-by: Conny Seidel Signed-off-by: Tejun Heo Cc: hans.rosenfeld@amd.com Cc: Christoph Lameter Cc: Conny Seidel Link: http://lkml.kernel.org/r/20110628094107.GB3386@htj.dyndns.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmzone_32.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 224e8c5eb30..ffa037f28d3 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -57,6 +57,8 @@ static inline int pfn_valid(int pfn) return 0; } +#define early_pfn_valid(pfn) pfn_valid((pfn)) + #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_NEED_MULTIPLE_NODES -- cgit v1.2.3-70-g09d2 From b49c78d4827be8d7e67e5b94adac6b30a4a9ad14 Mon Sep 17 00:00:00 2001 From: Peter Chubb Date: Wed, 6 Jul 2011 10:56:30 +1000 Subject: x86, reboot: Acer Aspire One A110 reboot quirk Since git commit 660e34cebf0a11d54f2d5dd8838607452355f321 x86: reorder reboot method preferences, my Acer Aspire One hangs on reboot. It appears that its ACPI method for rebooting is broken. The attached patch adds a quirk so that the machine will reboot via the BIOS. [ hpa: verified that the ACPI control on this machine is just plain broken. ] Signed-off-by: Peter Chubb Link: http://lkml.kernel.org/r/w439iki5vl.wl%25peter@chubb.wattle.id.au Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0c016f72769..4f0d46fefa7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -294,6 +294,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), }, }, + { /* Handle reboot issue on Acer Aspire one */ + .callback = set_bios_reboot, + .ident = "Acer Aspire One A110", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), + }, + }, { } }; -- cgit v1.2.3-70-g09d2 From 7a3136666bc0f0419f7aaa7b1fabb4b0e0a7fb76 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 Jul 2011 18:10:34 -0700 Subject: x86, suspend: Restore MISC_ENABLE MSR in realmode wakeup Some BIOSes will reset the Intel MISC_ENABLE MSR (specifically the XD_DISABLE bit) when resuming from S3, which can interact poorly with ebba638ae723d8a8fc2f7abce5ec18b688b791d7. In 32bit PAE mode, this can lead to a fault when EFER is restored by the kernel wakeup routines, due to it setting the NX bit for a CPU that (thanks to the BIOS reset) now incorrectly thinks it lacks the NX feature. (64bit is not affected because it uses a common CPU bring-up that specifically handles the XD_DISABLE bit.) The need for MISC_ENABLE being restored so early is specific to the S3 resume path. Normally, MISC_ENABLE is saved in save_processor_state(), but this happens after the resume header is created, so just reproduce the logic here. (acpi_suspend_lowlevel() creates the header, calls do_suspend_lowlevel, which calls save_processor_state(), so the saved processor context isn't available during resume header creation.) [ hpa: Consider for stable if OK in mainline ] Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20110707011034.GA8523@outflux.net Signed-off-by: H. Peter Anvin Cc: Rafael J. Wysocki Cc: 2.6.38+ --- arch/x86/kernel/acpi/realmode/wakeup.S | 14 ++++++++++++++ arch/x86/kernel/acpi/realmode/wakeup.h | 6 ++++++ arch/x86/kernel/acpi/sleep.c | 6 ++++++ 3 files changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index ead21b66311..b4fd836e405 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -28,6 +28,8 @@ pmode_cr3: .long 0 /* Saved %cr3 */ pmode_cr4: .long 0 /* Saved %cr4 */ pmode_efer: .quad 0 /* Saved EFER */ pmode_gdt: .quad 0 +pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ +pmode_behavior: .long 0 /* Wakeup behavior flags */ realmode_flags: .long 0 real_magic: .long 0 trampoline_segment: .word 0 @@ -91,6 +93,18 @@ wakeup_code: /* Call the C code */ calll main + /* Restore MISC_ENABLE before entering protected mode, in case + BIOS decided to clear XD_DISABLE during S3. */ + movl pmode_behavior, %eax + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + jnc 1f + + movl pmode_misc_en, %eax + movl pmode_misc_en + 4, %edx + movl $MSR_IA32_MISC_ENABLE, %ecx + wrmsr +1: + /* Do any other stuff... */ #ifndef CONFIG_64BIT diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index e1828c07e79..97a29e1430e 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -21,6 +21,9 @@ struct wakeup_header { u32 pmode_efer_low; /* Protected mode EFER */ u32 pmode_efer_high; u64 pmode_gdt; + u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ + u32 pmode_misc_en_high; + u32 pmode_behavior; /* Wakeup routine behavior flags */ u32 realmode_flags; u32 real_magic; u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ @@ -39,4 +42,7 @@ extern struct wakeup_header wakeup_header; #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 #define WAKEUP_END_SIGNATURE 0x65a22c82 +/* Wakeup behavior bits */ +#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 + #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 18a857ba7a2..103b6ab368d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -77,6 +77,12 @@ int acpi_suspend_lowlevel(void) header->pmode_cr0 = read_cr0(); header->pmode_cr4 = read_cr4_safe(); + header->pmode_behavior = 0; + if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, + &header->pmode_misc_en_low, + &header->pmode_misc_en_high)) + header->pmode_behavior |= + (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; -- cgit v1.2.3-70-g09d2 From f70e957cda22d309c769805cbb932407a5232219 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 6 Jul 2011 16:52:37 -0400 Subject: x86: Don't use the EFI reboot method by default Testing suggests that at least some Lenovos and some Intels will fail to reboot via EFI, attempting to jump to an unmapped physical address. In the long run we could handle this by providing a page table with a 1:1 mapping of physical addresses, but for now it's probably just easier to assume that ACPI or legacy methods will be present and reboot via those. Signed-off-by: Matthew Garrett Cc: Linus Torvalds Cc: Andrew Morton Cc: Alan Cox Link: http://lkml.kernel.org/r/1309985557-15350-1-git-send-email-mjg@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 474356b98ed..899e393d8e7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -504,9 +504,6 @@ void __init efi_init(void) x86_platform.set_wallclock = efi_set_rtc_mmss; #endif - /* Setup for EFI runtime service */ - reboot_type = BOOT_EFI; - #if EFI_DEBUG print_efi_memmap(); #endif -- cgit v1.2.3-70-g09d2 From ee339fe63ac408e4604c1c88b1f9a428f2511b70 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 6 Jul 2011 09:43:16 -0400 Subject: xen/pci: Move check for acpi_sci_override_gsi to xen_setup_acpi_sci. Previously we would check for acpi_sci_override_gsi == gsi every time a PCI device was enabled. That works during early bootup, but later on it could lead to triggering unnecessarily the acpi_gsi_to_irq(..) lookup. The reason is that acpi_sci_override_gsi was declared in __initdata and after early bootup could contain bogus values. This patch moves the check for acpi_sci_override_gsi to the site where the ACPI SCI is preset. CC: stable@kernel.org Reported-by: Raghavendra D Prabhu Tested-by: Raghavendra D Prabhu [http://lists.xensource.com/archives/html/xen-devel/2011-07/msg00154.html] Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index fe008309ffe..f567965c062 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -327,13 +327,12 @@ int __init pci_xen_hvm_init(void) } #ifdef CONFIG_XEN_DOM0 -static int xen_register_pirq(u32 gsi, int triggering) +static int xen_register_pirq(u32 gsi, int gsi_override, int triggering) { int rc, pirq, irq = -1; struct physdev_map_pirq map_irq; int shareable = 0; char *name; - bool gsi_override = false; if (!xen_pv_domain()) return -1; @@ -345,31 +344,12 @@ static int xen_register_pirq(u32 gsi, int triggering) shareable = 1; name = "ioapic-level"; } - pirq = xen_allocate_pirq_gsi(gsi); if (pirq < 0) goto out; - /* Before we bind the GSI to a Linux IRQ, check whether - * we need to override it with bus_irq (IRQ) value. Usually for - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) - * but there are oddballs where the IRQ != GSI: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) - * which ends up being: gsi_to_irq[9] == 20 - * (which is what acpi_gsi_to_irq ends up calling when starting the - * the ACPI interpreter and keels over since IRQ 9 has not been - * setup as we had setup IRQ 20 for it). - */ - if (gsi == acpi_sci_override_gsi) { - /* Check whether the GSI != IRQ */ - acpi_gsi_to_irq(gsi, &irq); - if (irq != gsi) - /* Bugger, we MUST have that IRQ. */ - gsi_override = true; - } - if (gsi_override) - irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name); + if (gsi_override >= 0) + irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name); else irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); if (irq < 0) @@ -392,7 +372,7 @@ out: return irq; } -static int xen_register_gsi(u32 gsi, int triggering, int polarity) +static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) { int rc, irq; struct physdev_setup_gsi setup_gsi; @@ -403,7 +383,7 @@ static int xen_register_gsi(u32 gsi, int triggering, int polarity) printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", gsi, triggering, polarity); - irq = xen_register_pirq(gsi, triggering); + irq = xen_register_pirq(gsi, gsi_override, triggering); setup_gsi.gsi = gsi; setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); @@ -425,6 +405,8 @@ static __init void xen_setup_acpi_sci(void) int rc; int trigger, polarity; int gsi = acpi_sci_override_gsi; + int irq = -1; + int gsi_override = -1; if (!gsi) return; @@ -441,7 +423,25 @@ static __init void xen_setup_acpi_sci(void) printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " "polarity=%d\n", gsi, trigger, polarity); - gsi = xen_register_gsi(gsi, trigger, polarity); + /* Before we bind the GSI to a Linux IRQ, check whether + * we need to override it with bus_irq (IRQ) value. Usually for + * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) + * but there are oddballs where the IRQ != GSI: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) + * which ends up being: gsi_to_irq[9] == 20 + * (which is what acpi_gsi_to_irq ends up calling when starting the + * the ACPI interpreter and keels over since IRQ 9 has not been + * setup as we had setup IRQ 20 for it). + */ + /* Check whether the GSI != IRQ */ + if (acpi_gsi_to_irq(gsi, &irq) == 0) { + if (irq >= 0 && irq != gsi) + /* Bugger, we MUST have that IRQ. */ + gsi_override = irq; + } + + gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); printk(KERN_INFO "xen: acpi sci %d\n", gsi); return; @@ -450,7 +450,7 @@ static __init void xen_setup_acpi_sci(void) static int acpi_register_gsi_xen(struct device *dev, u32 gsi, int trigger, int polarity) { - return xen_register_gsi(gsi, trigger, polarity); + return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); } static int __init pci_xen_initial_domain(void) @@ -489,7 +489,7 @@ void __init xen_setup_pirqs(void) if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) continue; - xen_register_pirq(irq, + xen_register_pirq(irq, -1 /* no GSI override */, trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); } } -- cgit v1.2.3-70-g09d2