From 8fbac214e5c594a0c2fe78c14adf2cdbb1febc92 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 18 Jul 2013 17:20:33 +0100 Subject: ARM: 7787/1: virt: ensure visibility of __boot_cpu_mode Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a cached value of __boot_cpu_mode may be incoherent with that in memory. This could lead to a failure to detect mismatched boot modes. This patch adds flushing to ensure that writes by secondaries to __boot_cpu_mode are made visible before we test against it. Signed-off-by: Mark Rutland Acked-by: Dave Martin Acked-by: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Russell King --- arch/arm/include/asm/virt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 50af92bac73..4371f45c578 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -29,6 +29,7 @@ #define BOOT_CPU_MODE_MISMATCH PSR_N_BIT #ifndef __ASSEMBLY__ +#include #ifdef CONFIG_ARM_VIRT_EXT /* @@ -41,10 +42,21 @@ */ extern int __boot_cpu_mode; +static inline void sync_boot_mode(void) +{ + /* + * As secondaries write to __boot_cpu_mode with caches disabled, we + * must flush the corresponding cache entries to ensure the visibility + * of their writes. + */ + sync_cache_r(&__boot_cpu_mode); +} + void __hyp_set_vectors(unsigned long phys_vector_base); unsigned long __hyp_get_vectors(void); #else #define __boot_cpu_mode (SVC_MODE) +#define sync_boot_mode() #endif #ifndef ZIMAGE -- cgit v1.2.3-70-g09d2 From 1f49856bb029779d8f1b63517a3a3b34ffe672c7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 23 Jul 2013 15:13:06 +0100 Subject: ARM: 7789/1: Do not run dummy_flush_tlb_a15_erratum() on non-Cortex-A15 Commit 93dc688 (ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations)) causes the following undefined instruction error on a mx53 (Cortex-A8): Internal error: Oops - undefined instruction: 0 [#1] SMP ARM CPU: 0 PID: 275 Comm: modprobe Not tainted 3.11.0-rc2-next-20130722-00009-g9b0f371 #881 task: df46cc00 ti: df48e000 task.ti: df48e000 PC is at check_and_switch_context+0x17c/0x4d0 LR is at check_and_switch_context+0xdc/0x4d0 This problem happens because check_and_switch_context() calls dummy_flush_tlb_a15_erratum() without checking if we are really running on a Cortex-A15 or not. To avoid this issue, only call dummy_flush_tlb_a15_erratum() inside check_and_switch_context() if erratum_a15_798181() returns true, which means that we are really running on a Cortex-A15. Signed-off-by: Fabio Estevam Acked-by: Catalin Marinas Reviewed-by: Roger Quadros Signed-off-by: Russell King --- arch/arm/include/asm/tlbflush.h | 16 ++++++++++++++++ arch/arm/kernel/smp_tlb.c | 17 ----------------- arch/arm/mm/context.c | 3 ++- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index fdbb9e36974..f467e9b3f8d 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -443,7 +443,18 @@ static inline void local_flush_bp_all(void) isb(); } +#include #ifdef CONFIG_ARM_ERRATA_798181 +static inline int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} + static inline void dummy_flush_tlb_a15_erratum(void) { /* @@ -453,6 +464,11 @@ static inline void dummy_flush_tlb_a15_erratum(void) dsb(); } #else +static inline int erratum_a15_798181(void) +{ + return 0; +} + static inline void dummy_flush_tlb_a15_erratum(void) { } diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index a98b62dca2f..c2edfff573c 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -70,23 +70,6 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } -#ifdef CONFIG_ARM_ERRATA_798181 -static int erratum_a15_798181(void) -{ - unsigned int midr = read_cpuid_id(); - - /* Cortex-A15 r0p0..r3p2 affected */ - if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) - return 0; - return 1; -} -#else -static int erratum_a15_798181(void) -{ - return 0; -} -#endif - static void ipi_flush_tlb_a15_erratum(void *arg) { dmb(); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index b55b1015724..4a0544492f1 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -245,7 +245,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); - dummy_flush_tlb_a15_erratum(); + if (erratum_a15_798181()) + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); -- cgit v1.2.3-70-g09d2 From bdae73cd374e28db544fdd9b77de689a36e3c129 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 23 Jul 2013 16:15:36 +0100 Subject: ARM: 7790/1: Fix deferred mm switch on VIVT processors As of commit b9d4d42ad9 (ARM: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW on pre-ARMv6 CPUs), the mm switching on VIVT processors is done in the finish_arch_post_lock_switch() function to avoid whole cache flushing with interrupts disabled. The need for deferred mm switch is stored as a thread flag (TIF_SWITCH_MM). However, with preemption enabled, we can have another thread switch before finish_arch_post_lock_switch(). If the new thread has the same mm as the previous 'next' thread, the scheduler will not call switch_mm() and the TIF_SWITCH_MM flag won't be set for the new thread. This patch moves the switch pending flag to the mm_context_t structure since this is specific to the mm rather than thread. Signed-off-by: Catalin Marinas Reported-by: Marc Kleine-Budde Tested-by: Marc Kleine-Budde Cc: # 3.5+ Signed-off-by: Russell King --- arch/arm/include/asm/mmu.h | 2 ++ arch/arm/include/asm/mmu_context.h | 20 ++++++++++++++++---- arch/arm/include/asm/thread_info.h | 1 - 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index e3d55547e75..d1b4998e4f4 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -6,6 +6,8 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID atomic64_t id; +#else + int switch_pending; #endif unsigned int vmalloc_seq; } mm_context_t; diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index b5792b7fd8d..9b32f76bb0d 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -56,7 +56,7 @@ static inline void check_and_switch_context(struct mm_struct *mm, * on non-ASID CPUs, the old mm will remain valid until the * finish_arch_post_lock_switch() call. */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + mm->context.switch_pending = 1; else cpu_switch_mm(mm->pgd, mm); } @@ -65,9 +65,21 @@ static inline void check_and_switch_context(struct mm_struct *mm, finish_arch_post_lock_switch static inline void finish_arch_post_lock_switch(void) { - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - cpu_switch_mm(mm->pgd, mm); + struct mm_struct *mm = current->mm; + + if (mm && mm->context.switch_pending) { + /* + * Preemption must be disabled during cpu_switch_mm() as we + * have some stateful cache flush implementations. Check + * switch_pending again in case we were preempted and the + * switch to this mm was already done. + */ + preempt_disable(); + if (mm->context.switch_pending) { + mm->context.switch_pending = 0; + cpu_switch_mm(mm->pgd, mm); + } + preempt_enable_no_resched(); } } diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 214d4158089..2b8114fcba0 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -156,7 +156,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 -#define TIF_SWITCH_MM 22 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -- cgit v1.2.3-70-g09d2 From acfdd4b1f7590d02e9bae3b73bdbbc4a31b05d38 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Jul 2013 11:44:48 +0100 Subject: ARM: 7791/1: a.out: remove partial a.out support a.out support on ARM requires that argc, argv and envp are passed in r0-r2 respectively, which requires hacking load_aout_binary to prevent argc being clobbered by the return code. Whilst mainline kernels do set the registers up in start_thread, the aout loader has never carried the hack in mainline. Initialising the registers in this way actually goes against the libc expectations for ELF binaries, where argc, argv and envp are passed on the stack, with r0 being used to hold a pointer to an exit function for cleaning up after the dynamic linker if required. If the pointer is NULL, then it is ignored. When execing an ELF binary, Linux currently zeroes r0, then sets it to argc and then finally clobbers it with the return value of the execve syscall, so we actually end up with: r0 = 0 stack[0] = argc r1 = stack[1] = argv r2 = stack[2] = envp libc treats r1 and r2 as undefined. The clobbering of r0 by sys_execve works for user-spawned threads, but when executing an ELF binary from a kernel thread (via call_usermodehelper), the execve is performed on the ret_from_fork path, which restores r0 from the saved pt_regs, resulting in argc being presented to the C library. This has horrible consequences when the application exits, since we have an exit function registered using argc, resulting in a jump to hyperspace. This patch solves the problem by removing the partial a.out support from arch/arm/ altogether. Cc: Cc: Ashish Sangwan Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 1 - arch/arm/include/asm/a.out-core.h | 45 --------------------------------------- arch/arm/include/asm/processor.h | 4 ---- arch/arm/include/uapi/asm/Kbuild | 1 - arch/arm/include/uapi/asm/a.out.h | 34 ----------------------------- 5 files changed, 85 deletions(-) delete mode 100644 arch/arm/include/asm/a.out-core.h delete mode 100644 arch/arm/include/uapi/asm/a.out.h (limited to 'arch/arm/include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba412e02ec0..82f069829ac 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -20,7 +20,6 @@ config ARM select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HARDIRQS_SW_RESEND - select HAVE_AOUT select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h deleted file mode 100644 index 92f10cb5c70..00000000000 --- a/arch/arm/include/asm/a.out-core.h +++ /dev/null @@ -1,45 +0,0 @@ -/* a.out coredump register dumper - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_A_OUT_CORE_H -#define _ASM_A_OUT_CORE_H - -#ifdef __KERNEL__ - -#include -#include - -/* - * fill in the user structure for an a.out core dump - */ -static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) -{ - struct task_struct *tsk = current; - - dump->magic = CMAGIC; - dump->start_code = tsk->mm->start_code; - dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1); - - dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT; - dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT; - dump->u_ssize = 0; - - memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg)); - - if (dump->start_stack < 0x04000000) - dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT; - - dump->regs = *regs; - dump->u_fpvalid = dump_fpu (regs, &dump->u_fp); -} - -#endif /* __KERNEL__ */ -#endif /* _ASM_A_OUT_CORE_H */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 06e7d509eaa..413f3876341 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -54,7 +54,6 @@ struct thread_struct { #define start_thread(regs,pc,sp) \ ({ \ - unsigned long *stack = (unsigned long *)sp; \ memset(regs->uregs, 0, sizeof(regs->uregs)); \ if (current->personality & ADDR_LIMIT_32BIT) \ regs->ARM_cpsr = USR_MODE; \ @@ -65,9 +64,6 @@ struct thread_struct { regs->ARM_cpsr |= PSR_ENDSTATE; \ regs->ARM_pc = pc & ~1; /* pc */ \ regs->ARM_sp = sp; /* sp */ \ - regs->ARM_r2 = stack[2]; /* r2 (envp) */ \ - regs->ARM_r1 = stack[1]; /* r1 (argv) */ \ - regs->ARM_r0 = stack[0]; /* r0 (argc) */ \ nommu_start_thread(regs); \ }) diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 47bcb2d254a..18d76fd5a2a 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -header-y += a.out.h header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h deleted file mode 100644 index 083894b2e3b..00000000000 --- a/arch/arm/include/uapi/asm/a.out.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ARM_A_OUT_H__ -#define __ARM_A_OUT_H__ - -#include -#include - -struct exec -{ - __u32 a_info; /* Use macros N_MAGIC, etc for access */ - __u32 a_text; /* length of text, in bytes */ - __u32 a_data; /* length of data, in bytes */ - __u32 a_bss; /* length of uninitialized data area for file, in bytes */ - __u32 a_syms; /* length of symbol table data in file, in bytes */ - __u32 a_entry; /* start address */ - __u32 a_trsize; /* length of relocation info for text, in bytes */ - __u32 a_drsize; /* length of relocation info for data, in bytes */ -}; - -/* - * This is always the same - */ -#define N_TXTADDR(a) (0x00008000) - -#define N_TRSIZE(a) ((a).a_trsize) -#define N_DRSIZE(a) ((a).a_drsize) -#define N_SYMSIZE(a) ((a).a_syms) - -#define M_ARM 103 - -#ifndef LIBRARY_START_TEXT -#define LIBRARY_START_TEXT (0x00c00000) -#endif - -#endif /* __A_OUT_GNU_H__ */ -- cgit v1.2.3-70-g09d2 From 067e710b9a982a92cc8294d7fa0f1e924c65bba1 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 30 Jul 2013 12:38:45 +0100 Subject: ARM: 7801/1: v6: prevent gcc 4.5 from reordering extended CP15 reads above is_smp() test Commit 621a0147d5c921f4cc33636ccd0602ad5d7cbfbc ("ARM: 7757/1: mm: don't flush icache in switch_mm with hardware broadcasting") breaks the boot on OMAP2430SDP with omap2plus_defconfig. Tracked to an undefined instruction abort from the CP15 read in cache_ops_need_broadcast(). It turns out that gcc 4.5 reorders the extended CP15 read above the is_smp() test. This breaks ARM1136 r0 cores, since they don't support several CP15 registers that later ARM cores do. ARM1136JF-S TRM section 3.2.1 "Register allocation" has the details. So mark the extended CP15 read as clobbering memory, which prevents the compiler from reordering it before the is_smp() test. Russell states that the code generated from this approach is preferable to marking the inline asm as volatile. Remove the existing condition code clobber as it's obsolete, per Nico's post: http://www.spinics.net/lists/arm-kernel/msg261208.html This patch is a collaboration with Will Deacon and Russell King. Comments from Paul Walmsley: Russell, if you accept this one, might you also add Will's ack from the lists: Comments from Paul Walmsley: I'd also be obliged if you could add a Cc: line for Jonathan Austin, since he helped test: Signed-off-by: Paul Walmsley Cc: Will Deacon Cc: Nicolas Pitre Cc: Tony Lindgren Acked-by: Will Deacon Cc: Jonathan Austin Signed-off-by: Russell King --- arch/arm/include/asm/cputype.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 8c25dc4e985..9672e978d50 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -89,13 +89,18 @@ extern unsigned int processor_id; __val; \ }) +/* + * The memory clobber prevents gcc 4.5 from reordering the mrc before + * any is_smp() tests, which can cause undefined instruction aborts on + * ARM1136 r0 due to the missing extended CP15 registers. + */ #define read_cpuid_ext(ext_reg) \ ({ \ unsigned int __val; \ asm("mrc p15, 0, %0, c0, " ext_reg \ : "=r" (__val) \ : \ - : "cc"); \ + : "memory"); \ __val; \ }) -- cgit v1.2.3-70-g09d2 From 48be69a026b2c17350a5ef18a1959a919f60be7d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 24 Jul 2013 00:29:18 +0100 Subject: ARM: move signal handlers into a vdso-like page Move the signal handlers into a VDSO page rather than keeping them in the vectors page. This allows us to place them randomly within this page, and also map the page at a random location within userspace further protecting these code fragments from ROP attacks. The new VDSO page is also poisoned in the same way as the vector page. Signed-off-by: Russell King --- arch/arm/include/asm/elf.h | 4 ++++ arch/arm/include/asm/mmu.h | 1 + arch/arm/kernel/process.c | 40 ++++++++++++++++++++++++++++++++--- arch/arm/kernel/signal.c | 52 +++++++++++++++++++++++++++++++++++++++------- arch/arm/kernel/signal.h | 12 ----------- arch/arm/kernel/traps.c | 9 -------- 6 files changed, 87 insertions(+), 31 deletions(-) delete mode 100644 arch/arm/kernel/signal.h (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 38050b1c480..9c9b30717fd 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -130,4 +130,8 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +int arch_setup_additional_pages(struct linux_binprm *, int); + #endif diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index e3d55547e75..7345e37155d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -8,6 +8,7 @@ typedef struct { atomic64_t id; #endif unsigned int vmalloc_seq; + unsigned long sigpage; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d3ca4f6915a..566d0d71a1e 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -428,8 +428,8 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) #ifdef CONFIG_MMU /* * The vectors page is always readable from user space for the - * atomic helpers and the signal restart code. Insert it into the - * gate_vma so that it is visible through ptrace and /proc//mem. + * atomic helpers. Insert it into the gate_vma so that it is visible + * through ptrace and /proc//mem. */ static struct vm_area_struct gate_vma = { .vm_start = 0xffff0000, @@ -461,6 +461,40 @@ int in_gate_area_no_mm(unsigned long addr) const char *arch_vma_name(struct vm_area_struct *vma) { - return (vma == &gate_vma) ? "[vectors]" : NULL; + return (vma == &gate_vma) ? "[vectors]" : + (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? + "[sigpage]" : NULL; +} + +extern struct page *get_signal_page(void); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page *page; + unsigned long addr; + int ret; + + page = get_signal_page(); + if (!page) + return -ENOMEM; + + down_write(&mm->mmap_sem); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &page); + + if (ret == 0) + mm->context.sigpage = addr; + + up_fail: + up_write(&mm->mmap_sem); + return ret; } #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 1c16c35c271..0f17e06d51e 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include @@ -15,12 +16,11 @@ #include #include +#include #include #include #include -#include "signal.h" - /* * For ARM syscalls, we encode the syscall number into the instruction. */ @@ -40,11 +40,13 @@ #define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) #define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) -const unsigned long sigreturn_codes[7] = { +static const unsigned long sigreturn_codes[7] = { MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, }; +static unsigned long signal_return_offset; + #ifdef CONFIG_CRUNCH static int preserve_crunch_context(struct crunch_sigframe __user *frame) { @@ -401,12 +403,15 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, return 1; if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) { + struct mm_struct *mm = current->mm; + /* - * 32-bit code can use the new high-page - * signal return code support except when the MPU has - * protected the vectors page from PL0 + * 32-bit code can use the signal return page + * except when the MPU has protected the vectors + * page from PL0 */ - retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb; + retcode = mm->context.sigpage + signal_return_offset + + (idx << 2) + thumb; } else { /* * Ensure that the instruction cache sees @@ -608,3 +613,36 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } while (thread_flags & _TIF_WORK_MASK); return 0; } + +static struct page *signal_page; + +struct page *get_signal_page(void) +{ + if (!signal_page) { + unsigned long ptr; + unsigned offset; + void *addr; + + signal_page = alloc_pages(GFP_KERNEL, 0); + + if (!signal_page) + return NULL; + + addr = page_address(signal_page); + + /* Give the signal return code some randomness */ + offset = 0x200 + (get_random_int() & 0x7fc); + signal_return_offset = offset; + + /* + * Copy signal return handlers into the vector page, and + * set sigreturn to be a pointer to these. + */ + memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); + + ptr = (unsigned long)addr + offset; + flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + } + + return signal_page; +} diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h deleted file mode 100644 index 5ff067b7c75..00000000000 --- a/arch/arm/kernel/signal.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * linux/arch/arm/kernel/signal.h - * - * Copyright (C) 2005-2009 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500) - -extern const unsigned long sigreturn_codes[7]; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index e3ca35ccd38..ab517fcce21 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -35,8 +35,6 @@ #include #include -#include "signal.h" - static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; void *vectors_page; @@ -850,13 +848,6 @@ void __init early_trap_init(void *vectors_base) kuser_init(vectors_base); - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ - memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), - sigreturn_codes, sizeof(sigreturn_codes)); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); #else /* ifndef CONFIG_CPU_V7M */ -- cgit v1.2.3-70-g09d2 From a5463cd3435475386cbbe7b06e01292ac169d36f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Jul 2013 21:58:56 +0100 Subject: ARM: make vectors page inaccessible from userspace If kuser helpers are not provided by the kernel, disable user access to the vectors page. With the kuser helpers gone, there is no reason for this page to be visible to userspace. Signed-off-by: Russell King --- arch/arm/include/asm/page.h | 2 ++ arch/arm/kernel/process.c | 7 ++++++- arch/arm/mm/mmu.c | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 6363f3d1d50..4355f0ec44d 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -142,7 +142,9 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from, #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) extern void copy_page(void *to, const void *from); +#ifdef CONFIG_KUSER_HELPERS #define __HAVE_ARCH_GATE_AREA 1 +#endif #ifdef CONFIG_ARM_LPAE #include diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 566d0d71a1e..1e6c33d01c0 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -426,6 +426,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU +#ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the * atomic helpers. Insert it into the gate_vma so that it is visible @@ -458,10 +459,14 @@ int in_gate_area_no_mm(unsigned long addr) { return in_gate_area(NULL, addr); } +#define is_gate_vma(vma) ((vma) = &gate_vma) +#else +#define is_gate_vma(vma) 0 +#endif const char *arch_vma_name(struct vm_area_struct *vma) { - return (vma == &gate_vma) ? "[vectors]" : + return is_gate_vma(vma) ? "[vectors]" : (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? "[sigpage]" : NULL; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 9ea274d1af6..ca46f413d86 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1205,7 +1205,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc) map.pfn = __phys_to_pfn(virt_to_phys(vectors)); map.virtual = 0xffff0000; map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif create_mapping(&map); if (!vectors_high()) { -- cgit v1.2.3-70-g09d2 From 8c0cc8a5d90bc7373a7a9e7f7a40eb41f51e03fc Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Aug 2013 10:39:51 +0100 Subject: ARM: fix nommu builds with 48be69a02 (ARM: move signal handlers into a vdso-like page) Olof reports that noMMU builds error out with: arch/arm/kernel/signal.c: In function 'setup_return': arch/arm/kernel/signal.c:413:25: error: 'mm_context_t' has no member named 'sigpage' This shows one of the evilnesses of IS_ENABLED(). Get rid of it here and replace it with #ifdef's - and as no noMMU platform can make use of sigpage, depend on CONIFG_MMU not CONFIG_ARM_MPU. Reported-by: Olof Johansson Signed-off-by: Russell King --- arch/arm/include/asm/elf.h | 2 ++ arch/arm/kernel/signal.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 9c9b30717fd..56211f2084e 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -130,8 +130,10 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +#ifdef CONFIG_MMU #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; int arch_setup_additional_pages(struct linux_binprm *, int); +#endif #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 39e7105a9b7..ab330422527 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -402,7 +402,8 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, __put_user(sigreturn_codes[idx+1], rc+1)) return 1; - if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) { +#ifdef CONFIG_MMU + if (cpsr & MODE32_BIT) { struct mm_struct *mm = current->mm; /* @@ -412,7 +413,9 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ retcode = mm->context.sigpage + signal_return_offset + (idx << 2) + thumb; - } else { + } else +#endif + { /* * Ensure that the instruction cache sees * the return code written onto the stack. -- cgit v1.2.3-70-g09d2 From afa31d8eb86fc2f25083e675d57ac8173a98f999 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2013 18:03:26 +0100 Subject: ARM: 7811/1: locks: use early clobber in arch_spin_trylock The res variable is written before we've finished with the input operands (namely the lock address), so ensure that we mark it as `early clobber' to avoid unintended register sharing. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index f8b8965666e..7ed43f68e04 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -107,7 +107,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) " subs %1, %0, %0, ror #16\n" " addeq %0, %0, %4\n" " strexeq %2, %0, [%3]" - : "=&r" (slock), "=&r" (contended), "=r" (res) + : "=&r" (slock), "=&r" (contended), "=&r" (res) : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); } while (res); -- cgit v1.2.3-70-g09d2 From 00efaa0250939dc148e2d3104fb3c18395d24a2d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2013 18:04:05 +0100 Subject: ARM: 7812/1: rwlocks: retry trylock operation if strex fails on free lock Commit 15e7e5c1ebf5 ("ARM: 7749/1: spinlock: retry trylock operation if strex fails on free lock") modifying our arch_spin_trylock to retry the acquisition if the lock appeared uncontended, but the strex failed. This patch does the same for rwlocks, which were missed by the original patch. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 49 +++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 7ed43f68e04..b07c09e5a0a 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -168,17 +168,20 @@ static inline void arch_write_lock(arch_rwlock_t *rw) static inline int arch_write_trylock(arch_rwlock_t *rw) { - unsigned long tmp; + unsigned long contended, res; - __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&rw->lock), "r" (0x80000000) - : "cc"); + do { + __asm__ __volatile__( + " ldrex %0, [%2]\n" + " mov %1, #0\n" + " teq %0, #0\n" + " strexeq %1, %3, [%2]" + : "=&r" (contended), "=&r" (res) + : "r" (&rw->lock), "r" (0x80000000) + : "cc"); + } while (res); - if (tmp == 0) { + if (!contended) { smp_mb(); return 1; } else { @@ -254,18 +257,26 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw) { - unsigned long tmp, tmp2 = 1; + unsigned long contended, res; - __asm__ __volatile__( -" ldrex %0, [%2]\n" -" adds %0, %0, #1\n" -" strexpl %1, %0, [%2]\n" - : "=&r" (tmp), "+r" (tmp2) - : "r" (&rw->lock) - : "cc"); + do { + __asm__ __volatile__( + " ldrex %0, [%2]\n" + " mov %1, #0\n" + " adds %0, %0, #1\n" + " strexpl %1, %0, [%2]" + : "=&r" (contended), "=&r" (res) + : "r" (&rw->lock) + : "cc"); + } while (res); - smp_mb(); - return tmp2 == 0; + /* If the lock is negative, then it is already held for write. */ + if (contended < 0x80000000) { + smp_mb(); + return 1; + } else { + return 0; + } } /* read_can_lock - would read_trylock() succeed? */ -- cgit v1.2.3-70-g09d2 From 2103f6cba61a8b8bea3fc1b63661d830a2125e76 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 2 Aug 2013 20:52:49 +0100 Subject: ARM: 7807/1: kexec: validate CPU hotplug support Architectures should fully validate whether kexec is possible as part of machine_kexec_prepare(), so that user-space's kexec_load() operation can report any problems. Performing validation in machine_kexec() itself is too late, since it is not allowed to return. Prior to this patch, ARM's machine_kexec() was testing after-the-fact whether machine_kexec_prepare() was able to disable all but one CPU. Instead, modify machine_kexec_prepare() to validate all conditions necessary for machine_kexec_prepare()'s to succeed. BUG if the validation succeeded, yet disabling the CPUs didn't actually work. Signed-off-by: Stephen Warren Acked-by: "Eric W. Biederman" Signed-off-by: Russell King --- arch/arm/include/asm/smp_plat.h | 3 +++ arch/arm/kernel/machine_kexec.c | 20 ++++++++++++++++---- arch/arm/kernel/smp.c | 10 ++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 6462a721ebd..a252c0bfacf 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -88,4 +88,7 @@ static inline u32 mpidr_hash_size(void) { return 1 << mpidr_hash.bits; } + +extern int platform_can_cpu_hotplug(void); + #endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 4fb074c446b..d7c82df6924 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include extern const unsigned char relocate_new_kernel[]; @@ -38,6 +39,14 @@ int machine_kexec_prepare(struct kimage *image) __be32 header; int i, err; + /* + * Validate that if the current HW supports SMP, then the SW supports + * and implements CPU hotplug for the current HW. If not, we won't be + * able to kexec reliably, so fail the prepare operation. + */ + if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug()) + return -EINVAL; + /* * No segment at default ATAGs address. try to locate * a dtb using magic. @@ -134,10 +143,13 @@ void machine_kexec(struct kimage *image) unsigned long reboot_code_buffer_phys; void *reboot_code_buffer; - if (num_online_cpus() > 1) { - pr_err("kexec: error: multiple CPUs still online\n"); - return; - } + /* + * This can only happen if machine_shutdown() failed to disable some + * CPU, and that can only happen if the checks in + * machine_kexec_prepare() were not correct. If this fails, we can't + * reliably kexec anyway, so BUG_ON is appropriate. + */ + BUG_ON(num_online_cpus() > 1); page_list = image->head & PAGE_MASK; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index c2b4f8f0be9..2dc19349eb1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -145,6 +145,16 @@ int boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOSYS; } +int platform_can_cpu_hotplug(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (smp_ops.cpu_kill) + return 1; +#endif + + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU static void percpu_timer_stop(void); -- cgit v1.2.3-70-g09d2 From 2b047252d087be7f2ba088b4933cd904f92e6fce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Aug 2013 11:42:25 -0700 Subject: Fix TLB gather virtual address range invalidation corner cases Ben Tebulin reported: "Since v3.7.2 on two independent machines a very specific Git repository fails in 9/10 cases on git-fsck due to an SHA1/memory failures. This only occurs on a very specific repository and can be reproduced stably on two independent laptops. Git mailing list ran out of ideas and for me this looks like some very exotic kernel issue" and bisected the failure to the backport of commit 53a59fc67f97 ("mm: limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT"). That commit itself is not actually buggy, but what it does is to make it much more likely to hit the partial TLB invalidation case, since it introduces a new case in tlb_next_batch() that previously only ever happened when running out of memory. The real bug is that the TLB gather virtual memory range setup is subtly buggered. It was introduced in commit 597e1c3580b7 ("mm/mmu_gather: enable tlb flush range in generic mmu_gather"), and the range handling was already fixed at least once in commit e6c495a96ce0 ("mm: fix the TLB range flushed when __tlb_remove_page() runs out of slots"), but that fix was not complete. The problem with the TLB gather virtual address range is that it isn't set up by the initial tlb_gather_mmu() initialization (which didn't get the TLB range information), but it is set up ad-hoc later by the functions that actually flush the TLB. And so any such case that forgot to update the TLB range entries would potentially miss TLB invalidates. Rather than try to figure out exactly which particular ad-hoc range setup was missing (I personally suspect it's the hugetlb case in zap_huge_pmd(), which didn't have the same logic as zap_pte_range() did), this patch just gets rid of the problem at the source: make the TLB range information available to tlb_gather_mmu(), and initialize it when initializing all the other tlb gather fields. This makes the patch larger, but conceptually much simpler. And the end result is much more understandable; even if you want to play games with partial ranges when invalidating the TLB contents in chunks, now the range information is always there, and anybody who doesn't want to bother with it won't introduce subtle bugs. Ben verified that this fixes his problem. Reported-bisected-and-tested-by: Ben Tebulin Build-testing-by: Stephen Rothwell Build-testing-by: Richard Weinberger Reviewed-by: Michal Hocko Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 7 +++++-- arch/arm64/include/asm/tlb.h | 7 +++++-- arch/ia64/include/asm/tlb.h | 9 ++++++--- arch/s390/include/asm/tlb.h | 8 ++++++-- arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 6 ++++-- fs/exec.c | 4 ++-- include/asm-generic/tlb.h | 2 +- mm/hugetlb.c | 2 +- mm/memory.c | 36 +++++++++++++++++++++--------------- mm/mmap.c | 4 ++-- 11 files changed, 57 insertions(+), 34 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 46e7cfb3e72..0baf7f0d939 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -43,6 +43,7 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; struct vm_area_struct *vma; + unsigned long start, end; unsigned long range_start; unsigned long range_end; unsigned int nr; @@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->vma = NULL; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 46b3beb4b77..717031a762c 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -35,6 +35,7 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; struct vm_area_struct *vma; + unsigned long start, end; unsigned long range_start; unsigned long range_end; unsigned int nr; @@ -97,10 +98,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->vma = NULL; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index ef3a9de0195..bc5efc7c3f3 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -22,7 +22,7 @@ * unmapping a portion of the virtual address space, these hooks are called according to * the following template: * - * tlb <- tlb_gather_mmu(mm, full_mm_flush); // start unmap for address space MM + * tlb <- tlb_gather_mmu(mm, start, end); // start unmap for address space MM * { * for each vma that needs a shootdown do { * tlb_start_vma(tlb, vma); @@ -58,6 +58,7 @@ struct mmu_gather { unsigned int max; unsigned char fullmm; /* non-zero means full mm flush */ unsigned char need_flush; /* really unmapped some PTEs? */ + unsigned long start, end; unsigned long start_addr; unsigned long end_addr; struct page **pages; @@ -155,13 +156,15 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; tlb->nr = 0; - tlb->fullmm = full_mm_flush; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->start_addr = ~0UL; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index b75d7d68668..23a64d25f2b 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -32,6 +32,7 @@ struct mmu_gather { struct mm_struct *mm; struct mmu_table_batch *batch; unsigned int fullmm; + unsigned long start, unsigned long end; }; struct mmu_table_batch { @@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned int full_mm_flush) + unsigned long start, + unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); tlb->batch = NULL; if (tlb->fullmm) __tlb_flush_mm(mm); diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index e61d43d9f68..362192ed12f 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,10 +36,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); init_tlb_gather(tlb); } diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 4febacd1a8a..29b0301c18a 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,10 +45,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); init_tlb_gather(tlb); } diff --git a/fs/exec.c b/fs/exec.c index 9c73def8764..fd774c7cb48 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, old_start, old_end); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. @@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } - tlb_finish_mmu(&tlb, new_end, old_end); + tlb_finish_mmu(&tlb, old_start, old_end); /* * Shrink the vma to just the new range. Always succeeds. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 13821c339a4..5672d7ea1fa 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,7 +112,7 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 83aff0a4d09..b60f33080a2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2490,7 +2490,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, mm = vma->vm_mm; - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); tlb_finish_mmu(&tlb, start, end); } diff --git a/mm/memory.c b/mm/memory.c index 40268410732..af84bc0ec17 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -209,14 +209,15 @@ static int tlb_next_batch(struct mmu_gather *tlb) * tear-down from @mm. The @fullmm argument is used when @mm is without * users and we're going to destroy the full address space (exit/execve). */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; - tlb->start = -1UL; - tlb->end = 0; + tlb->start = start; + tlb->end = end; tlb->need_flush = 0; tlb->local.next = NULL; tlb->local.nr = 0; @@ -256,8 +257,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e { struct mmu_gather_batch *batch, *next; - tlb->start = start; - tlb->end = end; tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -1099,7 +1098,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; - unsigned long range_start = addr; again: init_rss_vec(rss); @@ -1205,17 +1203,25 @@ again: * and page-free while holding it. */ if (force_flush) { + unsigned long old_end; + force_flush = 0; -#ifdef HAVE_GENERIC_MMU_GATHER - tlb->start = range_start; + /* + * Flush the TLB just for the previous segment, + * then update the range to be the remaining + * TLB range. + */ + old_end = tlb->end; tlb->end = addr; -#endif + tlb_flush_mmu(tlb); - if (addr != end) { - range_start = addr; + + tlb->start = addr; + tlb->end = old_end; + + if (addr != end) goto again; - } } return addr; @@ -1400,7 +1406,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end = start + size; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) @@ -1426,7 +1432,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr unsigned long end = address + size; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, address, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, address, end); unmap_single_vma(&tlb, vma, address, end, details); diff --git a/mm/mmap.c b/mm/mmap.c index 1edbaa3136c..f9c97d10b87 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2336,7 +2336,7 @@ static void unmap_region(struct mm_struct *mm, struct mmu_gather tlb; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, @@ -2709,7 +2709,7 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); - tlb_gather_mmu(&tlb, mm, 1); + tlb_gather_mmu(&tlb, mm, 0, -1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); -- cgit v1.2.3-70-g09d2