From 82b2f495fba338d1e3098dde1df54944a9c19751 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 9 Jul 2013 15:16:06 +0100
Subject: arm64: virt: ensure visibility of __boot_cpu_mode

Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a
cached value of __boot_cpu_mode may be incoherent with that in memory.
This could lead to a failure to detect mismatched boot modes.

This patch adds flushing to ensure that writes by secondaries to
__boot_cpu_mode are made visible before we test against it.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <cdall@cs.columbia.edu>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 439827271e3..26e310c5434 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,7 @@
 #define BOOT_CPU_MODE_EL2	(0x0e12b007)
 
 #ifndef __ASSEMBLY__
+#include <asm/cacheflush.h>
 
 /*
  * __boot_cpu_mode records what mode CPUs were booted in.
@@ -36,9 +37,20 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);
 
+static inline void sync_boot_mode(void)
+{
+	/*
+	 * As secondaries write to __boot_cpu_mode with caches disabled, we
+	 * must flush the corresponding cache entries to ensure the visibility
+	 * of their writes.
+	 */
+	__flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode));
+}
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
+	sync_boot_mode();
 	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
 		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
 }
@@ -46,6 +58,7 @@ static inline bool is_hyp_mode_available(void)
 /* Check if the bootloader has booted CPUs in different modes */
 static inline bool is_hyp_mode_mismatched(void)
 {
+	sync_boot_mode();
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
-- 
cgit v1.2.3-70-g09d2


From b0946fc84628b8d60e7a2034b48d1aff7da9d1df Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 23 Jul 2013 11:05:10 +0100
Subject: arm64: Fix definition of arm_pm_restart to match the declaration

Commit ff70130 (arm64: use common reboot infrastructure) converted the
arm_pm_restart declaration to the new reboot infrastructure but missed
the actual definition.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1788bf6b471..57fb55c44c9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -81,7 +81,7 @@ void soft_restart(unsigned long addr)
 void (*pm_power_off)(void);
 EXPORT_SYMBOL_GPL(pm_power_off);
 
-void (*arm_pm_restart)(char str, const char *cmd);
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 EXPORT_SYMBOL_GPL(arm_pm_restart);
 
 void arch_cpu_idle_prepare(void)
-- 
cgit v1.2.3-70-g09d2


From 845ad05ec31e0f3872a321e10dbeaf872022632c Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Tue, 23 Jul 2013 18:52:31 +0100
Subject: arm64: Change kernel stack size to 16K

Written by Catalin Marinas, tested by APM on storm platform. This is needed
because of the failures encountered when running SpecWeb benchmark test.

Signed-off-by: Feng Kan <fkan@apm.com>
Acked-by: Kumar Sankaran <ksankaran@apm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/thread_info.h | 4 ++--
 arch/arm64/kernel/entry.S            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 3659e460071..23a3c4791d8 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -24,10 +24,10 @@
 #include <linux/compiler.h>
 
 #ifndef CONFIG_ARM64_64K_PAGES
-#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE_ORDER	2
 #endif
 
-#define THREAD_SIZE		8192
+#define THREAD_SIZE		16384
 #define THREAD_START_SP		(THREAD_SIZE - 16)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1d1314280a0..6ad781b21c0 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -121,7 +121,7 @@
 
 	.macro	get_thread_info, rd
 	mov	\rd, sp
-	and	\rd, \rd, #~((1 << 13) - 1)	// top of 8K stack
+	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
 	.endm
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 1bbd80549810637b7381ab0649ba7c7d62f1342a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Jun 2013 11:02:34 +0100
Subject: arm64: KVM: perform save/restore of PAR_EL1

Not saving PAR_EL1 is an unfortunate oversight. If the guest
performs an AT* operation and gets scheduled out before reading
the result of the translation from PAREL1, it could become
corrupted by another guest or the host.

Saving this register is made slightly more complicated as KVM also
uses it on the permission fault handling path, leading to an ugly
"stash and restore" sequence. Fortunately, this is already a slow
path so we don't really care. Also, Linux doesn't do any AT*
operation, so Linux guests are not impacted by this bug.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_asm.h | 17 ++++++++++-------
 arch/arm64/kvm/hyp.S             | 10 ++++++++++
 arch/arm64/kvm/sys_regs.c        |  3 +++
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de4163eb..b25763bc0ec 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,14 +42,15 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	21	/* Domain Access Control Register */
-#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	27
+#define	DACR32_EL2	22	/* Domain Access Control Register */
+#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	28
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -69,6 +70,8 @@
 #define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
 #define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
 #define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
 #define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
 #define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
 #define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ff985e3d8b7..218802f68b2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -214,6 +214,7 @@ __kvm_hyp_code_start:
 	mrs	x21,	tpidr_el1
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -225,6 +226,7 @@ __kvm_hyp_code_start:
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
+	str	x24, [x3, #160]
 .endm
 
 .macro restore_sysregs
@@ -243,6 +245,7 @@ __kvm_hyp_code_start:
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
+	ldr	x24, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +267,7 @@ __kvm_hyp_code_start:
 	msr	tpidr_el1,	x21
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -753,6 +757,10 @@ el1_trap:
 	 */
 	tbnz	x1, #7, 1f	// S1PTW is set
 
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
 	/*
 	 * Permission fault, HPFAR_EL2 is invalid.
 	 * Resolve the IPA the hard way using the guest VA.
@@ -766,6 +774,8 @@ el1_trap:
 
 	/* Read result */
 	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
 	tbnz	x3, #0, 3f		// Bail out if we failed the translation
 	ubfx	x3, x3, #12, #36	// Extract IPA
 	lsl	x3, x3, #4		// and present it like HPFAR
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 94923609753..02e9d09e1d8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
 	  NULL, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
-- 
cgit v1.2.3-70-g09d2


From f142e5eeb724cfbedd203b32b3b542d78dbe2545 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 11 Jun 2013 18:05:25 +0100
Subject: arm64: KVM: add missing dsb before invalidating Stage-2 TLBs

When performing a Stage-2 TLB invalidation, it is necessary to
make sure the write to the page tables is observable by all CPUs.

For this purpose, add dsb instructions to __kvm_tlb_flush_vmid_ipa
and __kvm_flush_vm_context before doing the TLB invalidation itself.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp.S | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 218802f68b2..1ac0bbbdddb 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -604,6 +604,8 @@ END(__kvm_vcpu_run)
 
 // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
 	kern_hyp_va	x0
 	ldr	x2, [x0, #KVM_VTTBR]
 	msr	vttbr_el2, x2
@@ -625,6 +627,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
 	dsb	sy
-- 
cgit v1.2.3-70-g09d2


From 6c8c0c4dc0e98ee2191211d66e9f876e95787073 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 22 Jul 2013 04:40:38 +0100
Subject: arm64: KVM: use 'int' instead of 'u32' for variable 'target' in
 kvm_host.h.

'target' will be set to '-1' in kvm_arch_vcpu_init(), and it need check
'target' whether less than zero or not in kvm_vcpu_initialized().

So need define target as 'int' instead of 'u32', just like ARM has done.

The related warning:

  arch/arm64/kvm/../../../arch/arm/kvm/arm.c:497:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits]

Signed-off-by: Chen Gang <gang.chen@asianux.com>
[Marc: reformated the Subject line to fit the series]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d7395686..0859a4ddd1e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -129,7 +129,7 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
 	/* Target CPU and feature flags */
-	u32 target;
+	int target;
 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
 	/* Detect first run of a vcpu */
-- 
cgit v1.2.3-70-g09d2


From 2b047252d087be7f2ba088b4933cd904f92e6fce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 15 Aug 2013 11:42:25 -0700
Subject: Fix TLB gather virtual address range invalidation corner cases

Ben Tebulin reported:

 "Since v3.7.2 on two independent machines a very specific Git
  repository fails in 9/10 cases on git-fsck due to an SHA1/memory
  failures.  This only occurs on a very specific repository and can be
  reproduced stably on two independent laptops.  Git mailing list ran
  out of ideas and for me this looks like some very exotic kernel issue"

and bisected the failure to the backport of commit 53a59fc67f97 ("mm:
limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT").

That commit itself is not actually buggy, but what it does is to make it
much more likely to hit the partial TLB invalidation case, since it
introduces a new case in tlb_next_batch() that previously only ever
happened when running out of memory.

The real bug is that the TLB gather virtual memory range setup is subtly
buggered.  It was introduced in commit 597e1c3580b7 ("mm/mmu_gather:
enable tlb flush range in generic mmu_gather"), and the range handling
was already fixed at least once in commit e6c495a96ce0 ("mm: fix the TLB
range flushed when __tlb_remove_page() runs out of slots"), but that fix
was not complete.

The problem with the TLB gather virtual address range is that it isn't
set up by the initial tlb_gather_mmu() initialization (which didn't get
the TLB range information), but it is set up ad-hoc later by the
functions that actually flush the TLB.  And so any such case that forgot
to update the TLB range entries would potentially miss TLB invalidates.

Rather than try to figure out exactly which particular ad-hoc range
setup was missing (I personally suspect it's the hugetlb case in
zap_huge_pmd(), which didn't have the same logic as zap_pte_range()
did), this patch just gets rid of the problem at the source: make the
TLB range information available to tlb_gather_mmu(), and initialize it
when initializing all the other tlb gather fields.

This makes the patch larger, but conceptually much simpler.  And the end
result is much more understandable; even if you want to play games with
partial ranges when invalidating the TLB contents in chunks, now the
range information is always there, and anybody who doesn't want to
bother with it won't introduce subtle bugs.

Ben verified that this fixes his problem.

Reported-bisected-and-tested-by: Ben Tebulin <tebulin@googlemail.com>
Build-testing-by: Stephen Rothwell <sfr@canb.auug.org.au>
Build-testing-by: Richard Weinberger <richard.weinberger@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h   |  7 +++++--
 arch/arm64/include/asm/tlb.h |  7 +++++--
 arch/ia64/include/asm/tlb.h  |  9 ++++++---
 arch/s390/include/asm/tlb.h  |  8 ++++++--
 arch/sh/include/asm/tlb.h    |  6 ++++--
 arch/um/include/asm/tlb.h    |  6 ++++--
 fs/exec.c                    |  4 ++--
 include/asm-generic/tlb.h    |  2 +-
 mm/hugetlb.c                 |  2 +-
 mm/memory.c                  | 36 +++++++++++++++++++++---------------
 mm/mmap.c                    |  4 ++--
 11 files changed, 57 insertions(+), 34 deletions(-)

(limited to 'arch/arm64')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 46e7cfb3e72..0baf7f0d939 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 46b3beb4b77..717031a762c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -35,6 +35,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -97,10 +98,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index ef3a9de0195..bc5efc7c3f3 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -22,7 +22,7 @@
  * unmapping a portion of the virtual address space, these hooks are called according to
  * the following template:
  *
- *	tlb <- tlb_gather_mmu(mm, full_mm_flush);	// start unmap for address space MM
+ *	tlb <- tlb_gather_mmu(mm, start, end);		// start unmap for address space MM
  *	{
  *	  for each vma that needs a shootdown do {
  *	    tlb_start_vma(tlb, vma);
@@ -58,6 +58,7 @@ struct mmu_gather {
 	unsigned int		max;
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start, end;
 	unsigned long		start_addr;
 	unsigned long		end_addr;
 	struct page		**pages;
@@ -155,13 +156,15 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
 	tlb->nr = 0;
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->start_addr = ~0UL;
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d68668..23a64d25f2b 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
 	struct mm_struct *mm;
 	struct mmu_table_batch *batch;
 	unsigned int fullmm;
+	unsigned long start, unsigned long end;
 };
 
 struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
-				  unsigned int full_mm_flush)
+				  unsigned long start,
+				  unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 	tlb->batch = NULL;
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index e61d43d9f68..362192ed12f 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,10 +36,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 4febacd1a8a..29b0301c18a 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,10 +45,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def8764..fd774c7cb48 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, old_start, old_end);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
 	}
-	tlb_finish_mmu(&tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, old_start, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 13821c339a4..5672d7ea1fa 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 83aff0a4d09..b60f33080a2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2490,7 +2490,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 
 	mm = vma->vm_mm;
 
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
 	tlb_finish_mmu(&tlb, start, end);
 }
diff --git a/mm/memory.c b/mm/memory.c
index 40268410732..af84bc0ec17 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -209,14 +209,15 @@ static int tlb_next_batch(struct mmu_gather *tlb)
  *	tear-down from @mm. The @fullmm argument is used when @mm is without
  *	users and we're going to destroy the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
-	tlb->fullmm     = fullmm;
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
 	tlb->need_flush_all = 0;
-	tlb->start	= -1UL;
-	tlb->end	= 0;
+	tlb->start	= start;
+	tlb->end	= end;
 	tlb->need_flush = 0;
 	tlb->local.next = NULL;
 	tlb->local.nr   = 0;
@@ -256,8 +257,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb->start = start;
-	tlb->end   = end;
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -1099,7 +1098,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	spinlock_t *ptl;
 	pte_t *start_pte;
 	pte_t *pte;
-	unsigned long range_start = addr;
 
 again:
 	init_rss_vec(rss);
@@ -1205,17 +1203,25 @@ again:
 	 * and page-free while holding it.
 	 */
 	if (force_flush) {
+		unsigned long old_end;
+
 		force_flush = 0;
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-		tlb->start = range_start;
+		/*
+		 * Flush the TLB just for the previous segment,
+		 * then update the range to be the remaining
+		 * TLB range.
+		 */
+		old_end = tlb->end;
 		tlb->end = addr;
-#endif
+
 		tlb_flush_mmu(tlb);
-		if (addr != end) {
-			range_start = addr;
+
+		tlb->start = addr;
+		tlb->end = old_end;
+
+		if (addr != end)
 			goto again;
-		}
 	}
 
 	return addr;
@@ -1400,7 +1406,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end = start + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1426,7 +1432,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
 	unsigned long end = address + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, address, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, address, end);
 	unmap_single_vma(&tlb, vma, address, end, details);
diff --git a/mm/mmap.c b/mm/mmap.c
index 1edbaa3136c..f9c97d10b87 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2336,7 +2336,7 @@ static void unmap_region(struct mm_struct *mm,
 	struct mmu_gather tlb;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end);
 	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
@@ -2709,7 +2709,7 @@ void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb_gather_mmu(&tlb, mm, 1);
+	tlb_gather_mmu(&tlb, mm, 0, -1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
-- 
cgit v1.2.3-70-g09d2


From 868f6fea8fa63f09acbfa93256d0d2abdcabff79 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 20 Aug 2013 11:47:39 +0100
Subject: arm64: perf: fix array out of bounds access in armpmu_map_hw_event()

This is a port of d9f966357b14 ("ARM: 7810/1: perf: Fix array out of
bounds access in armpmu_map_hw_event()") to arm64, which fixes an oops
in the arm64 perf backend found as a result of Vince's fuzzing tool.

Cc: <stable@vger.kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/perf_event.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9ba33c40cdf..2012646fb46 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -107,7 +107,12 @@ armpmu_map_cache_event(const unsigned (*cache_map)
 static int
 armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*event_map)[config];
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ee7538a008a45050c8f706d38b600f55953169f9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 20 Aug 2013 11:47:40 +0100
Subject: arm64: perf: fix event validation for software group leaders

This is a port of c95eb3184ea1 ("ARM: 7809/1: perf: fix event validation
for software group leaders") to arm64, which fixes a panic in the arm64
perf backend found as a result of Vince's fuzzing tool.

Cc: <stable@vger.kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/perf_event.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm64')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 2012646fb46..12e6ccb8869 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -322,6 +322,9 @@ validate_event(struct pmu_hw_events *hw_events,
 	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
+	if (is_software_event(event))
+		return 1;
+
 	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-- 
cgit v1.2.3-70-g09d2