From 0b827537e339c084ac9384df588969d400be9e0d Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 22 May 2009 13:23:37 -0700
Subject: x86: bugfix wbinvd() model check instead of family check

wbinvd is supported on all CPUs 486 or later. But,
pageattr.c is checking x86_model >= 4 before wbinvd(), which looks like
an oversight bug. It was first introduced at one place by changeset
d7c8f21a8cad0228c7c5ce2bb6dbd95d1ee49d13 and got copied over to second
place in the same file later.

[ Impact: fix missing cache flush on early-model CPUs, potential data corruption ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/pageattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 797f9f107cb..2cc019a3f71 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg)
 	 */
 	__flush_tlb_all();
 
-	if (cache && boot_cpu_data.x86_model >= 4)
+	if (cache && boot_cpu_data.x86 >= 4)
 		wbinvd();
 }
 
@@ -218,7 +218,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 
 	/* 4M threshold */
 	if (numpages >= 1024) {
-		if (boot_cpu_data.x86_model >= 4)
+		if (boot_cpu_data.x86 >= 4)
 			wbinvd();
 		return;
 	}
-- 
cgit v1.2.3-70-g09d2


From 0af48f42df15b97080b450d24219dd95db7b929a Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 22 May 2009 13:23:38 -0700
Subject: x86: cpa_flush_array wbinvd should be done on all CPUs

cpa_flush_array seems to prefer wbinvd() over clflush at 4M threshold.
clflush needs to be done on only one CPU as per instruction definition.
wbinvd() however, should be done on all CPUs.

[ Impact: fix missing flush which could cause data corruption ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/pageattr.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2cc019a3f71..0f9052bcec4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -204,6 +204,11 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	}
 }
 
+static void wbinvd_local(void *unused)
+{
+	wbinvd();
+}
+
 static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 			    int in_flags, struct page **pages)
 {
@@ -219,7 +224,8 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 	/* 4M threshold */
 	if (numpages >= 1024) {
 		if (boot_cpu_data.x86 >= 4)
-			wbinvd();
+			on_each_cpu(wbinvd_local, NULL, 1);
+
 		return;
 	}
 	/*
-- 
cgit v1.2.3-70-g09d2


From 2171787be2e71ff71159857bfeb21398b61eb615 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Tue, 26 May 2009 10:33:35 -0700
Subject: x86: avoid back to back on_each_cpu in cpa_flush_array

Cleanup cpa_flush_array() to avoid back to back on_each_cpu() calls.

[ Impact: optimizes fix 0af48f42df15b97080b450d24219dd95db7b929a ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/pageattr.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0f9052bcec4..e17efed088c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -204,30 +204,19 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	}
 }
 
-static void wbinvd_local(void *unused)
-{
-	wbinvd();
-}
-
 static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 			    int in_flags, struct page **pages)
 {
 	unsigned int i, level;
+	unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
 
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_range, NULL, 1);
+	on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
 
-	if (!cache)
+	if (!cache || do_wbinvd)
 		return;
 
-	/* 4M threshold */
-	if (numpages >= 1024) {
-		if (boot_cpu_data.x86 >= 4)
-			on_each_cpu(wbinvd_local, NULL, 1);
-
-		return;
-	}
 	/*
 	 * We only need to flush on one CPU,
 	 * clflush is a MESI-coherent instruction that
-- 
cgit v1.2.3-70-g09d2


From 32b154c0b0bae2879bf4e549d861caf1759a3546 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 28 May 2009 14:34:37 -0700
Subject: x86: ignore VM_LOCKED when determining if hugetlb-backed page tables
 can be shared or not

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13302

On x86 and x86-64, it is possible that page tables are shared beween
shared mappings backed by hugetlbfs.  As part of this,
page_table_shareable() checks a pair of vma->vm_flags and they must match
if they are to be shared.  All VMA flags are taken into account, including
VM_LOCKED.

The problem is that VM_LOCKED is cleared on fork().  When a process with a
shared memory segment forks() to exec() a helper, there will be shared
VMAs with different flags.  The impact is that the shared segment is
sometimes considered shareable and other times not, depending on what
process is checking.

What happens is that the segment page tables are being shared but the
count is inaccurate depending on the ordering of events.  As the page
tables are freed with put_page(), bad pmd's are found when some of the
children exit.  The hugepage counters also get corrupted and the Total and
Free count will no longer match even when all the hugepage-backed regions
are freed.  This requires a reboot of the machine to "fix".

This patch addresses the problem by comparing all flags except VM_LOCKED
when deciding if pagetables should be shared or not for hugetlbfs-backed
mapping.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <starlight@binnacle.cx>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/hugetlbpage.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86/mm')

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8f307d914c2..f46c340727b 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
 	unsigned long sbase = saddr & PUD_MASK;
 	unsigned long s_end = sbase + PUD_SIZE;
 
+	/* Allow segments to share if only one is marked locked */
+	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+
 	/*
 	 * match the virtual addresses, permission and the alignment of the
 	 * page table page.
 	 */
 	if (pmd_index(addr) != pmd_index(saddr) ||
-	    vma->vm_flags != svma->vm_flags ||
+	    vm_flags != svm_flags ||
 	    sbase < svma->vm_start || svma->vm_end < s_end)
 		return 0;
 
-- 
cgit v1.2.3-70-g09d2