From 94777fc51b3ad85ff9f705ddf7cdd0eb3bbad5a6 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 16 Oct 2012 07:50:21 -0500 Subject: x86/irq/ioapic: Check for valid irq_cfg pointer in smp_irq_move_cleanup_interrupt Posting this patch to fix an issue concerning sparse irq's that I raised a while back. There was discussion about adding refcounting to sparse irqs (to fix other potential race conditions), but that does not appear to have been addressed yet. This covers the only issue of this type that I've encountered in this area. A NULL pointer dereference can occur in smp_irq_move_cleanup_interrupt() if we haven't yet setup the irq_cfg pointer in the irq_desc.irq_data.chip_data. In create_irq_nr() there is a window where we have set vector_irq in __assign_irq_vector(), but not yet called irq_set_chip_data() to set the irq_cfg pointer. Should an IRQ_MOVE_CLEANUP_VECTOR hit the cpu in question during this time, smp_irq_move_cleanup_interrupt() will attempt to process the aforementioned irq, but panic when accessing irq_cfg. Only continue processing the irq if irq_cfg is non-NULL. Signed-off-by: Dimitri Sivanich Cc: Suresh Siddha Cc: Joerg Roedel Cc: Yinghai Lu Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/20121016125021.GA22935@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c265593ec2c..1817fa91102 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); + if (!cfg) + continue; + raw_spin_lock(&desc->lock); /* -- cgit v1.2.3-70-g09d2 From 6ede1fd3cb404c0016de6ac529df46d561bd558b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 22 Oct 2012 16:35:18 -0700 Subject: x86, mm: Trim memory in memblock to be page aligned We will not map partial pages, so need to make sure memblock allocation will not allocate those bytes out. Also we will use for_each_mem_pfn_range() to loop to map memory range to keep them consistent. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com Acked-by: Jacob Shin Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/e820.c | 3 +++ include/linux/memblock.h | 1 + mm/memblock.c | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ed858e9e9a7..df06ade26be 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void) memblock_add(ei->addr, ei->size); } + /* throw away partial pages */ + memblock_trim_memory(PAGE_SIZE); + memblock_dump_all(); } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 569d67d4243..d452ee19106 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -57,6 +57,7 @@ int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); +void memblock_trim_memory(phys_addr_t align); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, diff --git a/mm/memblock.c b/mm/memblock.c index 931eef145af..625905523c2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -930,6 +930,30 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } +void __init_memblock memblock_trim_memory(phys_addr_t align) +{ + int i; + phys_addr_t start, end, orig_start, orig_end; + struct memblock_type *mem = &memblock.memory; + + for (i = 0; i < mem->cnt; i++) { + orig_start = mem->regions[i].base; + orig_end = mem->regions[i].base + mem->regions[i].size; + start = round_up(orig_start, align); + end = round_down(orig_end, align); + + if (start == orig_start && end == orig_end) + continue; + + if (start < end) { + mem->regions[i].base = start; + mem->regions[i].size = end - start; + } else { + memblock_remove_region(mem, i); + i--; + } + } +} void __init_memblock memblock_set_current_limit(phys_addr_t limit) { -- cgit v1.2.3-70-g09d2 From 1f2ff682ac951ed82cc043cf140d2851084512df Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 22 Oct 2012 16:35:18 -0700 Subject: x86, mm: Use memblock memory loop instead of e820_RAM We need to handle E820_RAM and E820_RESERVED_KERNEL at the same time. Also memblock has page aligned range for ram, so we could avoid mapping partial pages. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com Acked-by: Jacob Shin Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/setup.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 468e98dfd44..5d888af8682 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -921,18 +921,19 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + unsigned long start, end; + unsigned long start_pfn, end_pfn; - if (ei->addr + ei->size <= 1UL << 32) - continue; + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, + NULL) { - if (ei->type == E820_RESERVED) + end = PFN_PHYS(end_pfn); + if (end <= (1UL<<32)) continue; + start = PFN_PHYS(start_pfn); max_pfn_mapped = init_memory_mapping( - ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr, - ei->addr + ei->size); + max((1UL<<32), start), end); } /* can we preseve max_low_pfn ?*/ -- cgit v1.2.3-70-g09d2 From 5189c2a7c7769ee9d037d76c1a7b8550ccf3481c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 24 Oct 2012 10:00:44 -0700 Subject: x86: efi: Turn off efi_enabled after setup on mixed fw/kernel When 32-bit EFI is used with 64-bit kernel (or vice versa), turn off efi_enabled once setup is done. Beyond setup, it is normally used to determine if runtime services are available and we will have none. This will resolve issues stemming from efivars modprobe panicking on a 32/64-bit setup, as well as some reboot issues on similar setups. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=45991 Reported-by: Marko Kohtala Reported-by: Maxim Kammerer Signed-off-by: Olof Johansson Acked-by: Maarten Lankhorst Cc: stable@kernel.org # 3.4 - 3.6 Cc: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/include/asm/efi.h | 1 + arch/x86/kernel/setup.c | 12 ++++++++++++ arch/x86/platform/efi/efi.c | 18 ++++++++++-------- 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c9dcc181d4d..029189db84d 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -98,6 +98,7 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); +extern void efi_unmap_memmap(void); #ifndef CONFIG_EFI /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a2bb18e0283..6fd7752cee9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1035,6 +1035,18 @@ void __init setup_arch(char **cmdline_p) arch_init_ideal_nops(); register_refined_jiffies(CLOCK_TICK_RATE); + +#ifdef CONFIG_EFI + /* Once setup is done above, disable efi_enabled on mismatched + * firmware/kernel archtectures since there is no support for + * runtime services. + */ + if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + efi_enabled = 0; + } +#endif } #ifdef CONFIG_X86_32 diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index aded2a91162..9bae3b73fcc 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -70,11 +70,15 @@ EXPORT_SYMBOL(efi); struct efi_memory_map memmap; bool efi_64bit; -static bool efi_native; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static inline bool efi_is_native(void) +{ + return IS_ENABLED(CONFIG_X86_64) == efi_64bit; +} + static int __init setup_noefi(char *arg) { efi_enabled = 0; @@ -420,7 +424,7 @@ void __init efi_reserve_boot_services(void) } } -static void __init efi_unmap_memmap(void) +void __init efi_unmap_memmap(void) { if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); @@ -432,7 +436,7 @@ void __init efi_free_boot_services(void) { void *p; - if (!efi_native) + if (!efi_is_native()) return; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -684,12 +688,10 @@ void __init efi_init(void) return; } efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; - efi_native = !efi_64bit; #else efi_phys.systab = (efi_system_table_t *) (boot_params.efi_info.efi_systab | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); - efi_native = efi_64bit; #endif if (efi_systab_init(efi_phys.systab)) { @@ -723,7 +725,7 @@ void __init efi_init(void) * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_native) + if (!efi_is_native()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else if (efi_runtime_init()) { efi_enabled = 0; @@ -735,7 +737,7 @@ void __init efi_init(void) return; } #ifdef CONFIG_X86_32 - if (efi_native) { + if (efi_is_native()) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } @@ -834,7 +836,7 @@ void __init efi_enter_virtual_mode(void) * non-native EFI */ - if (!efi_native) { + if (!efi_is_native()) { efi_unmap_memmap(); return; } -- cgit v1.2.3-70-g09d2