From 8fbac214e5c594a0c2fe78c14adf2cdbb1febc92 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 18 Jul 2013 17:20:33 +0100
Subject: ARM: 7787/1: virt: ensure visibility of __boot_cpu_mode

Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a
cached value of __boot_cpu_mode may be incoherent with that in memory.
This could lead to a failure to detect mismatched boot modes.

This patch adds flushing to ensure that writes by secondaries to
__boot_cpu_mode are made visible before we test against it.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <cdall@cs.columbia.edu>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/virt.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 50af92bac73..4371f45c578 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -29,6 +29,7 @@
 #define BOOT_CPU_MODE_MISMATCH	PSR_N_BIT
 
 #ifndef __ASSEMBLY__
+#include <asm/cacheflush.h>
 
 #ifdef CONFIG_ARM_VIRT_EXT
 /*
@@ -41,10 +42,21 @@
  */
 extern int __boot_cpu_mode;
 
+static inline void sync_boot_mode(void)
+{
+	/*
+	 * As secondaries write to __boot_cpu_mode with caches disabled, we
+	 * must flush the corresponding cache entries to ensure the visibility
+	 * of their writes.
+	 */
+	sync_cache_r(&__boot_cpu_mode);
+}
+
 void __hyp_set_vectors(unsigned long phys_vector_base);
 unsigned long __hyp_get_vectors(void);
 #else
 #define __boot_cpu_mode	(SVC_MODE)
+#define sync_boot_mode()
 #endif
 
 #ifndef ZIMAGE
-- 
cgit v1.2.3-70-g09d2


From 1f49856bb029779d8f1b63517a3a3b34ffe672c7 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Tue, 23 Jul 2013 15:13:06 +0100
Subject: ARM: 7789/1: Do not run dummy_flush_tlb_a15_erratum() on
 non-Cortex-A15

Commit 93dc688 (ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations)) causes the following undefined instruction error on a mx53 (Cortex-A8):

Internal error: Oops - undefined instruction: 0 [#1] SMP ARM
CPU: 0 PID: 275 Comm: modprobe Not tainted 3.11.0-rc2-next-20130722-00009-g9b0f371 #881
task: df46cc00 ti: df48e000 task.ti: df48e000
PC is at check_and_switch_context+0x17c/0x4d0
LR is at check_and_switch_context+0xdc/0x4d0

This problem happens because check_and_switch_context() calls dummy_flush_tlb_a15_erratum() without checking if we are really running on a Cortex-A15 or not.

To avoid this issue, only call dummy_flush_tlb_a15_erratum() inside
check_and_switch_context() if erratum_a15_798181() returns true, which means that we are really running on a Cortex-A15.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/tlbflush.h | 16 ++++++++++++++++
 arch/arm/kernel/smp_tlb.c       | 17 -----------------
 arch/arm/mm/context.c           |  3 ++-
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index fdbb9e36974..f467e9b3f8d 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -443,7 +443,18 @@ static inline void local_flush_bp_all(void)
 		isb();
 }
 
+#include <asm/cputype.h>
 #ifdef CONFIG_ARM_ERRATA_798181
+static inline int erratum_a15_798181(void)
+{
+	unsigned int midr = read_cpuid_id();
+
+	/* Cortex-A15 r0p0..r3p2 affected */
+	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2)
+		return 0;
+	return 1;
+}
+
 static inline void dummy_flush_tlb_a15_erratum(void)
 {
 	/*
@@ -453,6 +464,11 @@ static inline void dummy_flush_tlb_a15_erratum(void)
 	dsb();
 }
 #else
+static inline int erratum_a15_798181(void)
+{
+	return 0;
+}
+
 static inline void dummy_flush_tlb_a15_erratum(void)
 {
 }
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index a98b62dca2f..c2edfff573c 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -70,23 +70,6 @@ static inline void ipi_flush_bp_all(void *ignored)
 	local_flush_bp_all();
 }
 
-#ifdef CONFIG_ARM_ERRATA_798181
-static int erratum_a15_798181(void)
-{
-	unsigned int midr = read_cpuid_id();
-
-	/* Cortex-A15 r0p0..r3p2 affected */
-	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2)
-		return 0;
-	return 1;
-}
-#else
-static int erratum_a15_798181(void)
-{
-	return 0;
-}
-#endif
-
 static void ipi_flush_tlb_a15_erratum(void *arg)
 {
 	dmb();
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index b55b1015724..4a0544492f1 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -245,7 +245,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
 		local_flush_bp_all();
 		local_flush_tlb_all();
-		dummy_flush_tlb_a15_erratum();
+		if (erratum_a15_798181())
+			dummy_flush_tlb_a15_erratum();
 	}
 
 	atomic64_set(&per_cpu(active_asids, cpu), asid);
-- 
cgit v1.2.3-70-g09d2


From bdae73cd374e28db544fdd9b77de689a36e3c129 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 23 Jul 2013 16:15:36 +0100
Subject: ARM: 7790/1: Fix deferred mm switch on VIVT processors

As of commit b9d4d42ad9 (ARM: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW on
pre-ARMv6 CPUs), the mm switching on VIVT processors is done in the
finish_arch_post_lock_switch() function to avoid whole cache flushing
with interrupts disabled. The need for deferred mm switch is stored as a
thread flag (TIF_SWITCH_MM). However, with preemption enabled, we can
have another thread switch before finish_arch_post_lock_switch(). If the
new thread has the same mm as the previous 'next' thread, the scheduler
will not call switch_mm() and the TIF_SWITCH_MM flag won't be set for
the new thread.

This patch moves the switch pending flag to the mm_context_t structure
since this is specific to the mm rather than thread.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Tested-by: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: <stable@vger.kernel.org> # 3.5+
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mmu.h         |  2 ++
 arch/arm/include/asm/mmu_context.h | 20 ++++++++++++++++----
 arch/arm/include/asm/thread_info.h |  1 -
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index e3d55547e75..d1b4998e4f4 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -6,6 +6,8 @@
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	atomic64_t	id;
+#else
+	int		switch_pending;
 #endif
 	unsigned int	vmalloc_seq;
 } mm_context_t;
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index b5792b7fd8d..9b32f76bb0d 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -56,7 +56,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 		 * on non-ASID CPUs, the old mm will remain valid until the
 		 * finish_arch_post_lock_switch() call.
 		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
+		mm->context.switch_pending = 1;
 	else
 		cpu_switch_mm(mm->pgd, mm);
 }
@@ -65,9 +65,21 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 	finish_arch_post_lock_switch
 static inline void finish_arch_post_lock_switch(void)
 {
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		cpu_switch_mm(mm->pgd, mm);
+	struct mm_struct *mm = current->mm;
+
+	if (mm && mm->context.switch_pending) {
+		/*
+		 * Preemption must be disabled during cpu_switch_mm() as we
+		 * have some stateful cache flush implementations. Check
+		 * switch_pending again in case we were preempted and the
+		 * switch to this mm was already done.
+		 */
+		preempt_disable();
+		if (mm->context.switch_pending) {
+			mm->context.switch_pending = 0;
+			cpu_switch_mm(mm->pgd, mm);
+		}
+		preempt_enable_no_resched();
 	}
 }
 
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 214d4158089..2b8114fcba0 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -156,7 +156,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-- 
cgit v1.2.3-70-g09d2


From acfdd4b1f7590d02e9bae3b73bdbbc4a31b05d38 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 25 Jul 2013 11:44:48 +0100
Subject: ARM: 7791/1: a.out: remove partial a.out support

a.out support on ARM requires that argc, argv and envp are passed in
r0-r2 respectively, which requires hacking load_aout_binary to
prevent argc being clobbered by the return code. Whilst mainline kernels
do set the registers up in start_thread, the aout loader has never
carried the hack in mainline.

Initialising the registers in this way actually goes against the libc
expectations for ELF binaries, where argc, argv and envp are passed on
the stack, with r0 being used to hold a pointer to an exit function for
cleaning up after the dynamic linker if required. If the pointer is
NULL, then it is ignored. When execing an ELF binary, Linux currently
zeroes r0, then sets it to argc and then finally clobbers it with the
return value of the execve syscall, so we actually end up with:

	r0 = 0
	stack[0] = argc
	r1 = stack[1] = argv
	r2 = stack[2] = envp

libc treats r1 and r2 as undefined. The clobbering of r0 by sys_execve
works for user-spawned threads, but when executing an ELF binary from a
kernel thread (via call_usermodehelper), the execve is performed on the
ret_from_fork path, which restores r0 from the saved pt_regs, resulting
in argc being presented to the C library. This has horrible consequences
when the application exits, since we have an exit function registered
using argc, resulting in a jump to hyperspace.

This patch solves the problem by removing the partial a.out support from
arch/arm/ altogether.

Cc: <stable@vger.kernel.org>
Cc: Ashish Sangwan <ashishsangwan2@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                  |  1 -
 arch/arm/include/asm/a.out-core.h | 45 ---------------------------------------
 arch/arm/include/asm/processor.h  |  4 ----
 arch/arm/include/uapi/asm/Kbuild  |  1 -
 arch/arm/include/uapi/asm/a.out.h | 34 -----------------------------
 5 files changed, 85 deletions(-)
 delete mode 100644 arch/arm/include/asm/a.out-core.h
 delete mode 100644 arch/arm/include/uapi/asm/a.out.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba412e02ec0..82f069829ac 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -20,7 +20,6 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HARDIRQS_SW_RESEND
-	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
deleted file mode 100644
index 92f10cb5c70..00000000000
--- a/arch/arm/include/asm/a.out-core.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-	struct task_struct *tsk = current;
-
-	dump->magic = CMAGIC;
-	dump->start_code = tsk->mm->start_code;
-	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
-
-	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
-	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	dump->u_ssize = 0;
-
-	memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
-
-	if (dump->start_stack < 0x04000000)
-		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
-
-	dump->regs = *regs;
-	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d509eaa..413f3876341 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -54,7 +54,6 @@ struct thread_struct {
 
 #define start_thread(regs,pc,sp)					\
 ({									\
-	unsigned long *stack = (unsigned long *)sp;			\
 	memset(regs->uregs, 0, sizeof(regs->uregs));			\
 	if (current->personality & ADDR_LIMIT_32BIT)			\
 		regs->ARM_cpsr = USR_MODE;				\
@@ -65,9 +64,6 @@ struct thread_struct {
 	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
-	regs->ARM_r2 = stack[2];	/* r2 (envp) */			\
-	regs->ARM_r1 = stack[1];	/* r1 (argv) */			\
-	regs->ARM_r0 = stack[0];	/* r0 (argc) */			\
 	nommu_start_thread(regs);					\
 })
 
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 47bcb2d254a..18d76fd5a2a 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,7 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += a.out.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h
deleted file mode 100644
index 083894b2e3b..00000000000
--- a/arch/arm/include/uapi/asm/a.out.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ARM_A_OUT_H__
-#define __ARM_A_OUT_H__
-
-#include <linux/personality.h>
-#include <linux/types.h>
-
-struct exec
-{
-  __u32 a_info;		/* Use macros N_MAGIC, etc for access */
-  __u32 a_text;		/* length of text, in bytes */
-  __u32 a_data;		/* length of data, in bytes */
-  __u32 a_bss;		/* length of uninitialized data area for file, in bytes */
-  __u32 a_syms;		/* length of symbol table data in file, in bytes */
-  __u32 a_entry;	/* start address */
-  __u32 a_trsize;	/* length of relocation info for text, in bytes */
-  __u32 a_drsize;	/* length of relocation info for data, in bytes */
-};
-
-/*
- * This is always the same
- */
-#define N_TXTADDR(a)	(0x00008000)
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-#define M_ARM 103
-
-#ifndef LIBRARY_START_TEXT
-#define LIBRARY_START_TEXT	(0x00c00000)
-#endif
-
-#endif /* __A_OUT_GNU_H__ */
-- 
cgit v1.2.3-70-g09d2


From 067e710b9a982a92cc8294d7fa0f1e924c65bba1 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 30 Jul 2013 12:38:45 +0100
Subject: ARM: 7801/1: v6: prevent gcc 4.5 from reordering extended CP15 reads
 above is_smp() test

Commit 621a0147d5c921f4cc33636ccd0602ad5d7cbfbc ("ARM: 7757/1: mm:
don't flush icache in switch_mm with hardware broadcasting") breaks
the boot on OMAP2430SDP with omap2plus_defconfig.  Tracked to an
undefined instruction abort from the CP15 read in
cache_ops_need_broadcast().  It turns out that gcc 4.5 reorders the
extended CP15 read above the is_smp() test.  This breaks ARM1136 r0
cores, since they don't support several CP15 registers that later ARM
cores do.  ARM1136JF-S TRM section 3.2.1 "Register allocation" has the
details.

So mark the extended CP15 read as clobbering memory, which prevents
the compiler from reordering it before the is_smp() test.  Russell
states that the code generated from this approach is preferable to
marking the inline asm as volatile.  Remove the existing condition
code clobber as it's obsolete, per Nico's post:

    http://www.spinics.net/lists/arm-kernel/msg261208.html

This patch is a collaboration with Will Deacon and Russell King.

Comments from Paul Walmsley:

 Russell, if you accept this one, might you also add Will's ack from the lists:

Comments from Paul Walmsley:

 I'd also be obliged if you could add a Cc: line for Jonathan Austin, since he helped test:

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Tony Lindgren <tony@atomide.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cputype.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 8c25dc4e985..9672e978d50 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -89,13 +89,18 @@ extern unsigned int processor_id;
 		__val;							\
 	})
 
+/*
+ * The memory clobber prevents gcc 4.5 from reordering the mrc before
+ * any is_smp() tests, which can cause undefined instruction aborts on
+ * ARM1136 r0 due to the missing extended CP15 registers.
+ */
 #define read_cpuid_ext(ext_reg)						\
 	({								\
 		unsigned int __val;					\
 		asm("mrc	p15, 0, %0, c0, " ext_reg		\
 		    : "=r" (__val)					\
 		    :							\
-		    : "cc");						\
+		    : "memory");					\
 		__val;							\
 	})
 
-- 
cgit v1.2.3-70-g09d2


From 48be69a026b2c17350a5ef18a1959a919f60be7d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 24 Jul 2013 00:29:18 +0100
Subject: ARM: move signal handlers into a vdso-like page

Move the signal handlers into a VDSO page rather than keeping them in
the vectors page.  This allows us to place them randomly within this
page, and also map the page at a random location within userspace
further protecting these code fragments from ROP attacks.  The new
VDSO page is also poisoned in the same way as the vector page.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/elf.h |  4 ++++
 arch/arm/include/asm/mmu.h |  1 +
 arch/arm/kernel/process.c  | 40 ++++++++++++++++++++++++++++++++---
 arch/arm/kernel/signal.c   | 52 +++++++++++++++++++++++++++++++++++++++-------
 arch/arm/kernel/signal.h   | 12 -----------
 arch/arm/kernel/traps.c    |  9 --------
 6 files changed, 87 insertions(+), 31 deletions(-)
 delete mode 100644 arch/arm/kernel/signal.h

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 38050b1c480..9c9b30717fd 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -130,4 +130,8 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+int arch_setup_additional_pages(struct linux_binprm *, int);
+
 #endif
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index e3d55547e75..7345e37155d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -8,6 +8,7 @@ typedef struct {
 	atomic64_t	id;
 #endif
 	unsigned int	vmalloc_seq;
+	unsigned long	sigpage;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d3ca4f6915a..566d0d71a1e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -428,8 +428,8 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 #ifdef CONFIG_MMU
 /*
  * The vectors page is always readable from user space for the
- * atomic helpers and the signal restart code. Insert it into the
- * gate_vma so that it is visible through ptrace and /proc/<pid>/mem.
+ * atomic helpers. Insert it into the gate_vma so that it is visible
+ * through ptrace and /proc/<pid>/mem.
  */
 static struct vm_area_struct gate_vma = {
 	.vm_start	= 0xffff0000,
@@ -461,6 +461,40 @@ int in_gate_area_no_mm(unsigned long addr)
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return (vma == &gate_vma) ? "[vectors]" : NULL;
+	return (vma == &gate_vma) ? "[vectors]" :
+		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
+		 "[sigpage]" : NULL;
+}
+
+extern struct page *get_signal_page(void);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	struct page *page;
+	unsigned long addr;
+	int ret;
+
+	page = get_signal_page();
+	if (!page)
+		return -ENOMEM;
+
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&page);
+
+	if (ret == 0)
+		mm->context.sigpage = addr;
+
+ up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
 }
 #endif
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 1c16c35c271..0f17e06d51e 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/errno.h>
+#include <linux/random.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
 #include <linux/uaccess.h>
@@ -15,12 +16,11 @@
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
+#include <asm/traps.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-#include "signal.h"
-
 /*
  * For ARM syscalls, we encode the syscall number into the instruction.
  */
@@ -40,11 +40,13 @@
 #define SWI_THUMB_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE))
 #define SWI_THUMB_RT_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
 
-const unsigned long sigreturn_codes[7] = {
+static const unsigned long sigreturn_codes[7] = {
 	MOV_R7_NR_SIGRETURN,    SWI_SYS_SIGRETURN,    SWI_THUMB_SIGRETURN,
 	MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN,
 };
 
+static unsigned long signal_return_offset;
+
 #ifdef CONFIG_CRUNCH
 static int preserve_crunch_context(struct crunch_sigframe __user *frame)
 {
@@ -401,12 +403,15 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 			return 1;
 
 		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
+			struct mm_struct *mm = current->mm;
+
 			/*
-			 * 32-bit code can use the new high-page
-			 * signal return code support except when the MPU has
-			 * protected the vectors page from PL0
+			 * 32-bit code can use the signal return page
+			 * except when the MPU has protected the vectors
+			 * page from PL0
 			 */
-			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
+			retcode = mm->context.sigpage + signal_return_offset +
+				  (idx << 2) + thumb;
 		} else {
 			/*
 			 * Ensure that the instruction cache sees
@@ -608,3 +613,36 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	} while (thread_flags & _TIF_WORK_MASK);
 	return 0;
 }
+
+static struct page *signal_page;
+
+struct page *get_signal_page(void)
+{
+	if (!signal_page) {
+		unsigned long ptr;
+		unsigned offset;
+		void *addr;
+
+		signal_page = alloc_pages(GFP_KERNEL, 0);
+
+		if (!signal_page)
+			return NULL;
+
+		addr = page_address(signal_page);
+
+		/* Give the signal return code some randomness */
+		offset = 0x200 + (get_random_int() & 0x7fc);
+		signal_return_offset = offset;
+
+		/*
+		 * Copy signal return handlers into the vector page, and
+		 * set sigreturn to be a pointer to these.
+		 */
+		memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+
+		ptr = (unsigned long)addr + offset;
+		flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+	}
+
+	return signal_page;
+}
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
deleted file mode 100644
index 5ff067b7c75..00000000000
--- a/arch/arm/kernel/signal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- *  linux/arch/arm/kernel/signal.h
- *
- *  Copyright (C) 2005-2009 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define KERN_SIGRETURN_CODE	(CONFIG_VECTORS_BASE + 0x00000500)
-
-extern const unsigned long sigreturn_codes[7];
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index e3ca35ccd38..ab517fcce21 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -35,8 +35,6 @@
 #include <asm/tls.h>
 #include <asm/system_misc.h>
 
-#include "signal.h"
-
 static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
 
 void *vectors_page;
@@ -850,13 +848,6 @@ void __init early_trap_init(void *vectors_base)
 
 	kuser_init(vectors_base);
 
-	/*
-	 * Copy signal return handlers into the vector page, and
-	 * set sigreturn to be a pointer to these.
-	 */
-	memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
-	       sigreturn_codes, sizeof(sigreturn_codes));
-
 	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 #else /* ifndef CONFIG_CPU_V7M */
-- 
cgit v1.2.3-70-g09d2


From a5463cd3435475386cbbe7b06e01292ac169d36f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 31 Jul 2013 21:58:56 +0100
Subject: ARM: make vectors page inaccessible from userspace

If kuser helpers are not provided by the kernel, disable user access to
the vectors page.  With the kuser helpers gone, there is no reason for
this page to be visible to userspace.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/page.h | 2 ++
 arch/arm/kernel/process.c   | 7 ++++++-
 arch/arm/mm/mmu.c           | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 6363f3d1d50..4355f0ec44d 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -142,7 +142,9 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from,
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
+#ifdef CONFIG_KUSER_HELPERS
 #define __HAVE_ARCH_GATE_AREA 1
+#endif
 
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level-types.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 566d0d71a1e..1e6c33d01c0 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -426,6 +426,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_KUSER_HELPERS
 /*
  * The vectors page is always readable from user space for the
  * atomic helpers. Insert it into the gate_vma so that it is visible
@@ -458,10 +459,14 @@ int in_gate_area_no_mm(unsigned long addr)
 {
 	return in_gate_area(NULL, addr);
 }
+#define is_gate_vma(vma)	((vma) = &gate_vma)
+#else
+#define is_gate_vma(vma)	0
+#endif
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return (vma == &gate_vma) ? "[vectors]" :
+	return is_gate_vma(vma) ? "[vectors]" :
 		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
 		 "[sigpage]" : NULL;
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 9ea274d1af6..ca46f413d86 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1205,7 +1205,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
+#ifdef CONFIG_KUSER_HELPERS
 	map.type = MT_HIGH_VECTORS;
+#else
+	map.type = MT_LOW_VECTORS;
+#endif
 	create_mapping(&map);
 
 	if (!vectors_high()) {
-- 
cgit v1.2.3-70-g09d2


From 8c0cc8a5d90bc7373a7a9e7f7a40eb41f51e03fc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 3 Aug 2013 10:39:51 +0100
Subject: ARM: fix nommu builds with 48be69a02 (ARM: move signal handlers into
 a vdso-like page)

Olof reports that noMMU builds error out with:

arch/arm/kernel/signal.c: In function 'setup_return':
arch/arm/kernel/signal.c:413:25: error: 'mm_context_t' has no member named 'sigpage'

This shows one of the evilnesses of IS_ENABLED().  Get rid of it here
and replace it with #ifdef's - and as no noMMU platform can make use
of sigpage, depend on CONIFG_MMU not CONFIG_ARM_MPU.

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/elf.h | 2 ++
 arch/arm/kernel/signal.c   | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 9c9b30717fd..56211f2084e 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -130,8 +130,10 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+#ifdef CONFIG_MMU
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 int arch_setup_additional_pages(struct linux_binprm *, int);
+#endif
 
 #endif
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 39e7105a9b7..ab330422527 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -402,7 +402,8 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
-		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
+#ifdef CONFIG_MMU
+		if (cpsr & MODE32_BIT) {
 			struct mm_struct *mm = current->mm;
 
 			/*
@@ -412,7 +413,9 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 			 */
 			retcode = mm->context.sigpage + signal_return_offset +
 				  (idx << 2) + thumb;
-		} else {
+		} else
+#endif
+		{
 			/*
 			 * Ensure that the instruction cache sees
 			 * the return code written onto the stack.
-- 
cgit v1.2.3-70-g09d2


From afa31d8eb86fc2f25083e675d57ac8173a98f999 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 12 Aug 2013 18:03:26 +0100
Subject: ARM: 7811/1: locks: use early clobber in arch_spin_trylock

The res variable is written before we've finished with the input
operands (namely the lock address), so ensure that we mark it as `early
clobber' to avoid unintended register sharing.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index f8b8965666e..7ed43f68e04 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -107,7 +107,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 		"	subs	%1, %0, %0, ror #16\n"
 		"	addeq	%0, %0, %4\n"
 		"	strexeq	%2, %0, [%3]"
-		: "=&r" (slock), "=&r" (contended), "=r" (res)
+		: "=&r" (slock), "=&r" (contended), "=&r" (res)
 		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 		: "cc");
 	} while (res);
-- 
cgit v1.2.3-70-g09d2


From 00efaa0250939dc148e2d3104fb3c18395d24a2d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 12 Aug 2013 18:04:05 +0100
Subject: ARM: 7812/1: rwlocks: retry trylock operation if strex fails on free
 lock

Commit 15e7e5c1ebf5 ("ARM: 7749/1: spinlock: retry trylock operation if
strex fails on free lock") modifying our arch_spin_trylock to retry the
acquisition if the lock appeared uncontended, but the strex failed.

This patch does the same for rwlocks, which were missed by the original
patch.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/spinlock.h | 49 +++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 19 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 7ed43f68e04..b07c09e5a0a 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -168,17 +168,20 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	unsigned long tmp;
+	unsigned long contended, res;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&rw->lock), "r" (0x80000000)
-	: "cc");
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%2]\n"
+		"	mov	%1, #0\n"
+		"	teq	%0, #0\n"
+		"	strexeq	%1, %3, [%2]"
+		: "=&r" (contended), "=&r" (res)
+		: "r" (&rw->lock), "r" (0x80000000)
+		: "cc");
+	} while (res);
 
-	if (tmp == 0) {
+	if (!contended) {
 		smp_mb();
 		return 1;
 	} else {
@@ -254,18 +257,26 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	unsigned long tmp, tmp2 = 1;
+	unsigned long contended, res;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%2]\n"
-"	adds	%0, %0, #1\n"
-"	strexpl	%1, %0, [%2]\n"
-	: "=&r" (tmp), "+r" (tmp2)
-	: "r" (&rw->lock)
-	: "cc");
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%2]\n"
+		"	mov	%1, #0\n"
+		"	adds	%0, %0, #1\n"
+		"	strexpl	%1, %0, [%2]"
+		: "=&r" (contended), "=&r" (res)
+		: "r" (&rw->lock)
+		: "cc");
+	} while (res);
 
-	smp_mb();
-	return tmp2 == 0;
+	/* If the lock is negative, then it is already held for write. */
+	if (contended < 0x80000000) {
+		smp_mb();
+		return 1;
+	} else {
+		return 0;
+	}
 }
 
 /* read_can_lock - would read_trylock() succeed? */
-- 
cgit v1.2.3-70-g09d2


From 2103f6cba61a8b8bea3fc1b63661d830a2125e76 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 2 Aug 2013 20:52:49 +0100
Subject: ARM: 7807/1: kexec: validate CPU hotplug support

Architectures should fully validate whether kexec is possible as part of
machine_kexec_prepare(), so that user-space's kexec_load() operation can
report any problems. Performing validation in machine_kexec() itself is
too late, since it is not allowed to return.

Prior to this patch, ARM's machine_kexec() was testing after-the-fact
whether machine_kexec_prepare() was able to disable all but one CPU.
Instead, modify machine_kexec_prepare() to validate all conditions
necessary for machine_kexec_prepare()'s to succeed. BUG if the validation
succeeded, yet disabling the CPUs didn't actually work.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/smp_plat.h |  3 +++
 arch/arm/kernel/machine_kexec.c | 20 ++++++++++++++++----
 arch/arm/kernel/smp.c           | 10 ++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 6462a721ebd..a252c0bfacf 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -88,4 +88,7 @@ static inline u32 mpidr_hash_size(void)
 {
 	return 1 << mpidr_hash.bits;
 }
+
+extern int platform_can_cpu_hotplug(void);
+
 #endif
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 4fb074c446b..d7c82df6924 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -15,6 +15,7 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/mach-types.h>
+#include <asm/smp_plat.h>
 #include <asm/system_misc.h>
 
 extern const unsigned char relocate_new_kernel[];
@@ -38,6 +39,14 @@ int machine_kexec_prepare(struct kimage *image)
 	__be32 header;
 	int i, err;
 
+	/*
+	 * Validate that if the current HW supports SMP, then the SW supports
+	 * and implements CPU hotplug for the current HW. If not, we won't be
+	 * able to kexec reliably, so fail the prepare operation.
+	 */
+	if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+		return -EINVAL;
+
 	/*
 	 * No segment at default ATAGs address. try to locate
 	 * a dtb using magic.
@@ -134,10 +143,13 @@ void machine_kexec(struct kimage *image)
 	unsigned long reboot_code_buffer_phys;
 	void *reboot_code_buffer;
 
-	if (num_online_cpus() > 1) {
-		pr_err("kexec: error: multiple CPUs still online\n");
-		return;
-	}
+	/*
+	 * This can only happen if machine_shutdown() failed to disable some
+	 * CPU, and that can only happen if the checks in
+	 * machine_kexec_prepare() were not correct. If this fails, we can't
+	 * reliably kexec anyway, so BUG_ON is appropriate.
+	 */
+	BUG_ON(num_online_cpus() > 1);
 
 	page_list = image->head & PAGE_MASK;
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index c2b4f8f0be9..2dc19349eb1 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -145,6 +145,16 @@ int boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return -ENOSYS;
 }
 
+int platform_can_cpu_hotplug(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	if (smp_ops.cpu_kill)
+		return 1;
+#endif
+
+	return 0;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void percpu_timer_stop(void);
 
-- 
cgit v1.2.3-70-g09d2


From 2b047252d087be7f2ba088b4933cd904f92e6fce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 15 Aug 2013 11:42:25 -0700
Subject: Fix TLB gather virtual address range invalidation corner cases

Ben Tebulin reported:

 "Since v3.7.2 on two independent machines a very specific Git
  repository fails in 9/10 cases on git-fsck due to an SHA1/memory
  failures.  This only occurs on a very specific repository and can be
  reproduced stably on two independent laptops.  Git mailing list ran
  out of ideas and for me this looks like some very exotic kernel issue"

and bisected the failure to the backport of commit 53a59fc67f97 ("mm:
limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT").

That commit itself is not actually buggy, but what it does is to make it
much more likely to hit the partial TLB invalidation case, since it
introduces a new case in tlb_next_batch() that previously only ever
happened when running out of memory.

The real bug is that the TLB gather virtual memory range setup is subtly
buggered.  It was introduced in commit 597e1c3580b7 ("mm/mmu_gather:
enable tlb flush range in generic mmu_gather"), and the range handling
was already fixed at least once in commit e6c495a96ce0 ("mm: fix the TLB
range flushed when __tlb_remove_page() runs out of slots"), but that fix
was not complete.

The problem with the TLB gather virtual address range is that it isn't
set up by the initial tlb_gather_mmu() initialization (which didn't get
the TLB range information), but it is set up ad-hoc later by the
functions that actually flush the TLB.  And so any such case that forgot
to update the TLB range entries would potentially miss TLB invalidates.

Rather than try to figure out exactly which particular ad-hoc range
setup was missing (I personally suspect it's the hugetlb case in
zap_huge_pmd(), which didn't have the same logic as zap_pte_range()
did), this patch just gets rid of the problem at the source: make the
TLB range information available to tlb_gather_mmu(), and initialize it
when initializing all the other tlb gather fields.

This makes the patch larger, but conceptually much simpler.  And the end
result is much more understandable; even if you want to play games with
partial ranges when invalidating the TLB contents in chunks, now the
range information is always there, and anybody who doesn't want to
bother with it won't introduce subtle bugs.

Ben verified that this fixes his problem.

Reported-bisected-and-tested-by: Ben Tebulin <tebulin@googlemail.com>
Build-testing-by: Stephen Rothwell <sfr@canb.auug.org.au>
Build-testing-by: Richard Weinberger <richard.weinberger@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h   |  7 +++++--
 arch/arm64/include/asm/tlb.h |  7 +++++--
 arch/ia64/include/asm/tlb.h  |  9 ++++++---
 arch/s390/include/asm/tlb.h  |  8 ++++++--
 arch/sh/include/asm/tlb.h    |  6 ++++--
 arch/um/include/asm/tlb.h    |  6 ++++--
 fs/exec.c                    |  4 ++--
 include/asm-generic/tlb.h    |  2 +-
 mm/hugetlb.c                 |  2 +-
 mm/memory.c                  | 36 +++++++++++++++++++++---------------
 mm/mmap.c                    |  4 ++--
 11 files changed, 57 insertions(+), 34 deletions(-)

(limited to 'arch/arm/include')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 46e7cfb3e72..0baf7f0d939 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 46b3beb4b77..717031a762c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -35,6 +35,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -97,10 +98,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index ef3a9de0195..bc5efc7c3f3 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -22,7 +22,7 @@
  * unmapping a portion of the virtual address space, these hooks are called according to
  * the following template:
  *
- *	tlb <- tlb_gather_mmu(mm, full_mm_flush);	// start unmap for address space MM
+ *	tlb <- tlb_gather_mmu(mm, start, end);		// start unmap for address space MM
  *	{
  *	  for each vma that needs a shootdown do {
  *	    tlb_start_vma(tlb, vma);
@@ -58,6 +58,7 @@ struct mmu_gather {
 	unsigned int		max;
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start, end;
 	unsigned long		start_addr;
 	unsigned long		end_addr;
 	struct page		**pages;
@@ -155,13 +156,15 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
 	tlb->nr = 0;
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->start_addr = ~0UL;
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d68668..23a64d25f2b 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
 	struct mm_struct *mm;
 	struct mmu_table_batch *batch;
 	unsigned int fullmm;
+	unsigned long start, unsigned long end;
 };
 
 struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
-				  unsigned int full_mm_flush)
+				  unsigned long start,
+				  unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 	tlb->batch = NULL;
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index e61d43d9f68..362192ed12f 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,10 +36,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 4febacd1a8a..29b0301c18a 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,10 +45,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def8764..fd774c7cb48 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, old_start, old_end);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
 	}
-	tlb_finish_mmu(&tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, old_start, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 13821c339a4..5672d7ea1fa 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 83aff0a4d09..b60f33080a2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2490,7 +2490,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 
 	mm = vma->vm_mm;
 
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
 	tlb_finish_mmu(&tlb, start, end);
 }
diff --git a/mm/memory.c b/mm/memory.c
index 40268410732..af84bc0ec17 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -209,14 +209,15 @@ static int tlb_next_batch(struct mmu_gather *tlb)
  *	tear-down from @mm. The @fullmm argument is used when @mm is without
  *	users and we're going to destroy the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
-	tlb->fullmm     = fullmm;
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
 	tlb->need_flush_all = 0;
-	tlb->start	= -1UL;
-	tlb->end	= 0;
+	tlb->start	= start;
+	tlb->end	= end;
 	tlb->need_flush = 0;
 	tlb->local.next = NULL;
 	tlb->local.nr   = 0;
@@ -256,8 +257,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb->start = start;
-	tlb->end   = end;
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -1099,7 +1098,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	spinlock_t *ptl;
 	pte_t *start_pte;
 	pte_t *pte;
-	unsigned long range_start = addr;
 
 again:
 	init_rss_vec(rss);
@@ -1205,17 +1203,25 @@ again:
 	 * and page-free while holding it.
 	 */
 	if (force_flush) {
+		unsigned long old_end;
+
 		force_flush = 0;
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-		tlb->start = range_start;
+		/*
+		 * Flush the TLB just for the previous segment,
+		 * then update the range to be the remaining
+		 * TLB range.
+		 */
+		old_end = tlb->end;
 		tlb->end = addr;
-#endif
+
 		tlb_flush_mmu(tlb);
-		if (addr != end) {
-			range_start = addr;
+
+		tlb->start = addr;
+		tlb->end = old_end;
+
+		if (addr != end)
 			goto again;
-		}
 	}
 
 	return addr;
@@ -1400,7 +1406,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end = start + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1426,7 +1432,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
 	unsigned long end = address + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, address, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, address, end);
 	unmap_single_vma(&tlb, vma, address, end, details);
diff --git a/mm/mmap.c b/mm/mmap.c
index 1edbaa3136c..f9c97d10b87 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2336,7 +2336,7 @@ static void unmap_region(struct mm_struct *mm,
 	struct mmu_gather tlb;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end);
 	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
@@ -2709,7 +2709,7 @@ void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb_gather_mmu(&tlb, mm, 1);
+	tlb_gather_mmu(&tlb, mm, 0, -1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
-- 
cgit v1.2.3-70-g09d2