From 9594a4986192f99c01a7c0a1779b5ac0eff8e208 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:53:25 +0900
Subject: KVM: MMU: Use __gfn_to_rmap() to clean up kvm_handle_hva()

We can treat every level uniformly.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 28c8fbcc676..2beb95b57cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1281,14 +1281,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 			gfn_t gfn = memslot->base_gfn + gfn_offset;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+			ret = 0;
 
-			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
-				struct kvm_lpage_info *linfo;
+			for (j = PT_PAGE_TABLE_LEVEL;
+			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+				unsigned long *rmapp;
 
-				linfo = lpage_info_slot(gfn, memslot,
-							PT_DIRECTORY_LEVEL + j);
-				ret |= handler(kvm, &linfo->rmap_pde, data);
+				rmapp = __gfn_to_rmap(gfn, j, memslot);
+				ret |= handler(kvm, rmapp, data);
 			}
 			trace_kvm_age_page(hva, memslot, ret);
 			retval |= ret;
-- 
cgit v1.2.3-70-g09d2


From d19a748b1c42b133e9263e9023c1d162efa6f4ad Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:54:30 +0900
Subject: KVM: Introduce hva_to_gfn_memslot() for kvm_handle_hva()

This restricts hva handling in mmu code and makes it easier to extend
kvm_handle_hva() so that it can treat a range of addresses later in this
patch series.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +++---
 arch/x86/kvm/mmu.c                  | 3 +--
 include/linux/kvm_host.h            | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d03eb6f7b05..37037553fe6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -772,10 +772,10 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
-			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset],
-				      memslot->base_gfn + gfn_offset);
+			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
 			retval |= ret;
 		}
 	}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2beb95b57cb..170a632d9d3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1278,8 +1278,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
-			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
-			gfn_t gfn = memslot->base_gfn + gfn_offset;
+			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
 
 			ret = 0;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e3c86f8c86c..6f6c18a03c5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -740,6 +740,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
+static inline gfn_t
+hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
+{
+	gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
+
+	return slot->base_gfn + gfn_offset;
+}
+
 static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
 					       gfn_t gfn)
 {
-- 
cgit v1.2.3-70-g09d2


From 84504ef38673fa021b3d8f3da2b79cf878b33315 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:55:48 +0900
Subject: KVM: MMU: Make kvm_handle_hva() handle range of addresses

When guest's memory is backed by THP pages, MMU notifier needs to call
kvm_unmap_hva(), which in turn leads to kvm_handle_hva(), in a loop to
invalidate a range of pages which constitute one huge page:

  for each page
    for each memslot
      if page is in memslot
        unmap using rmap

This means although every page in that range is expected to be found in
the same memslot, we are forced to check unrelated memslots many times.
If the guest has more memslots, the situation will become worse.

Furthermore, if the range does not include any pages in the guest's
memory, the loop over the pages will just consume extra time.

This patch, together with the following patches, solves this problem by
introducing kvm_handle_hva_range() which makes the loop look like this:

  for each memslot
    for each page in memslot
      unmap using rmap

In this new processing, the actual work is converted to a loop over rmap
which is much more cache friendly than before.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 36 ++++++++++++++++++++++++-------
 arch/x86/kvm/mmu.c                  | 42 ++++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 37037553fe6..1a470bc2876 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	goto out_put;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-					 unsigned long gfn))
+static int kvm_handle_hva_range(struct kvm *kvm,
+				unsigned long start,
+				unsigned long end,
+				int (*handler)(struct kvm *kvm,
+					       unsigned long *rmapp,
+					       unsigned long gfn))
 {
 	int ret;
 	int retval = 0;
@@ -767,12 +770,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots) {
-		unsigned long start = memslot->userspace_addr;
-		unsigned long end;
+		unsigned long hva_start, hva_end;
+		gfn_t gfn, gfn_end;
+
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+		/*
+		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 */
+		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
-		end = start + (memslot->npages << PAGE_SHIFT);
-		if (hva >= start && hva < end) {
-			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+		for (; gfn < gfn_end; ++gfn) {
 			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
 			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	return retval;
 }
 
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 unsigned long gfn))
+{
+	return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
+}
+
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			   unsigned long gfn)
 {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 170a632d9d3..7235b0c9587 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1259,10 +1259,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return 0;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  unsigned long data,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-					 unsigned long data))
+static int kvm_handle_hva_range(struct kvm *kvm,
+				unsigned long start,
+				unsigned long end,
+				unsigned long data,
+				int (*handler)(struct kvm *kvm,
+					       unsigned long *rmapp,
+					       unsigned long data))
 {
 	int j;
 	int ret;
@@ -1273,13 +1276,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	slots = kvm_memslots(kvm);
 
 	kvm_for_each_memslot(memslot, slots) {
-		unsigned long start = memslot->userspace_addr;
-		unsigned long end;
+		unsigned long hva_start, hva_end;
+		gfn_t gfn, gfn_end;
 
-		end = start + (memslot->npages << PAGE_SHIFT);
-		if (hva >= start && hva < end) {
-			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+		/*
+		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 */
+		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
+		for (; gfn < gfn_end; ++gfn) {
 			ret = 0;
 
 			for (j = PT_PAGE_TABLE_LEVEL;
@@ -1289,7 +1301,9 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
 				ret |= handler(kvm, rmapp, data);
 			}
-			trace_kvm_age_page(hva, memslot, ret);
+			trace_kvm_age_page(memslot->userspace_addr +
+					(gfn - memslot->base_gfn) * PAGE_SIZE,
+					memslot, ret);
 			retval |= ret;
 		}
 	}
@@ -1297,6 +1311,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	return retval;
 }
 
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  unsigned long data,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 unsigned long data))
+{
+	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
+}
+
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-- 
cgit v1.2.3-70-g09d2


From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:56:33 +0900
Subject: KVM: Introduce kvm_unmap_hva_range() for
 kvm_mmu_notifier_invalidate_range_start()

When we tested KVM under memory pressure, with THP enabled on the host,
we noticed that MMU notifier took a long time to invalidate huge pages.

Since the invalidation was done with mmu_lock held, it not only wasted
the CPU but also made the host harder to respond.

This patch mitigates this by using kvm_handle_hva_range().

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++++++
 arch/x86/include/asm/kvm_host.h     | 1 +
 arch/x86/kvm/mmu.c                  | 5 +++++
 virt/kvm/kvm_main.c                 | 3 +--
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12fd7bf..572ad014126 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,6 +52,8 @@
 
 struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range(struct kvm *kvm,
+			       unsigned long start, unsigned long end);
 extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 1a470bc2876..3c635c0616b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -870,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	if (kvm->arch.using_mmu_notifiers)
+		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	return 0;
+}
+
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			 unsigned long gfn)
 {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a3e9409e90b..d4aab865606 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -944,6 +944,7 @@ extern bool kvm_rebooting;
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7235b0c9587..d2855f895fd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1324,6 +1324,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
 }
 
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
 	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3ce91c623e..e2b1a159e5d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	 * count is also read inside the mmu_lock critical section.
 	 */
 	kvm->mmu_notifier_count++;
-	for (; start < end; start += PAGE_SIZE)
-		need_tlb_flush |= kvm_unmap_hva(kvm, start);
+	need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
 	need_tlb_flush |= kvm->tlbs_dirty;
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
-- 
cgit v1.2.3-70-g09d2


From 77d11309b3a10e1ce112058ec2c9b7b979bcf311 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:57:17 +0900
Subject: KVM: Separate rmap_pde from kvm_lpage_info->write_count

This makes it possible to loop over rmap_pde arrays in the same way as
we do over rmap so that we can optimize kvm_handle_hva_range() easily in
the following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu.c              |  6 +++---
 arch/x86/kvm/x86.c              | 11 +++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d4aab865606..4f98da9243f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -500,11 +500,11 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_lpage_info {
-	unsigned long rmap_pde;
 	int write_count;
 };
 
 struct kvm_arch_memory_slot {
+	unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d2855f895fd..3b3f5ae5da6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -960,13 +960,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
 static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 				    struct kvm_memory_slot *slot)
 {
-	struct kvm_lpage_info *linfo;
+	unsigned long idx;
 
 	if (likely(level == PT_PAGE_TABLE_LEVEL))
 		return &slot->rmap[gfn - slot->base_gfn];
 
-	linfo = lpage_info_slot(gfn, slot, level);
-	return &linfo->rmap_pde;
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59b59508ff0..829b4e97255 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6314,6 +6314,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	int i;
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
+			kvm_kvfree(free->arch.rmap_pde[i]);
+			free->arch.rmap_pde[i] = NULL;
+		}
 		if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
 			kvm_kvfree(free->arch.lpage_info[i]);
 			free->arch.lpage_info[i] = NULL;
@@ -6333,6 +6337,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
+		slot->arch.rmap_pde[i] =
+			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
+		if (!slot->arch.rmap_pde[i])
+			goto out_free;
+
 		slot->arch.lpage_info[i] =
 			kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
 		if (!slot->arch.lpage_info[i])
@@ -6361,7 +6370,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 
 out_free:
 	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		kvm_kvfree(slot->arch.rmap_pde[i]);
 		kvm_kvfree(slot->arch.lpage_info[i]);
+		slot->arch.rmap_pde[i] = NULL;
 		slot->arch.lpage_info[i] = NULL;
 	}
 	return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 048212d0bc0b1769a4bbecd7ace8c8d237577d1b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:57:59 +0900
Subject: KVM: MMU: Add memslot parameter to hva handlers

This is needed to push trace_kvm_age_page() into kvm_age_rmapp() in the
following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3b3f5ae5da6..dfd7a9a3115 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1200,7 +1200,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			   unsigned long data)
+			   struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1218,7 +1218,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			     unsigned long data)
+			     struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1265,6 +1265,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long data,
 				int (*handler)(struct kvm *kvm,
 					       unsigned long *rmapp,
+					       struct kvm_memory_slot *slot,
 					       unsigned long data))
 {
 	int j;
@@ -1299,7 +1300,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long *rmapp;
 
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
-				ret |= handler(kvm, rmapp, data);
+				ret |= handler(kvm, rmapp, memslot, data);
 			}
 			trace_kvm_age_page(memslot->userspace_addr +
 					(gfn - memslot->base_gfn) * PAGE_SIZE,
@@ -1314,6 +1315,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 struct kvm_memory_slot *slot,
 					 unsigned long data))
 {
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1335,7 +1337,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			 unsigned long data)
+			 struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator uninitialized_var(iter);
@@ -1350,7 +1352,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
 	if (!shadow_accessed_mask)
-		return kvm_unmap_rmapp(kvm, rmapp, data);
+		return kvm_unmap_rmapp(kvm, rmapp, slot, data);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1367,7 +1369,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			      unsigned long data)
+			      struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1405,7 +1407,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-- 
cgit v1.2.3-70-g09d2


From f395302e09ef783b8f82d1160510a95aa8c66dbc Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:58:48 +0900
Subject: KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp()

This restricts the tracing to page aging and makes it possible to
optimize kvm_handle_hva_range() further in the following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dfd7a9a3115..58adec38448 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1269,8 +1269,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 					       unsigned long data))
 {
 	int j;
-	int ret;
-	int retval = 0;
+	int ret = 0;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 
@@ -1293,8 +1292,6 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
 		for (; gfn < gfn_end; ++gfn) {
-			ret = 0;
-
 			for (j = PT_PAGE_TABLE_LEVEL;
 			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
 				unsigned long *rmapp;
@@ -1302,14 +1299,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
 				ret |= handler(kvm, rmapp, memslot, data);
 			}
-			trace_kvm_age_page(memslot->userspace_addr +
-					(gfn - memslot->base_gfn) * PAGE_SIZE,
-					memslot, ret);
-			retval |= ret;
 		}
 	}
 
-	return retval;
+	return ret;
 }
 
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
@@ -1351,8 +1344,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	 * This has some overhead, but not as much as the cost of swapping
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
-	if (!shadow_accessed_mask)
-		return kvm_unmap_rmapp(kvm, rmapp, slot, data);
+	if (!shadow_accessed_mask) {
+		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+		goto out;
+	}
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1364,7 +1359,9 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-
+out:
+	/* @data has hva passed to kvm_age_hva(). */
+	trace_kvm_age_page(data, slot, young);
 	return young;
 }
 
@@ -1413,7 +1410,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-- 
cgit v1.2.3-70-g09d2


From bcd3ef58283a471d6b65855b83f78bd39eb55391 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:59:33 +0900
Subject: KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range()

When we invalidate a THP page, we call the handler with the same
rmap_pde argument 512 times in the following loop:

  for each guest page in the range
    for each level
      unmap using rmap

This patch avoids these extra handler calls by changing the loop order
like this:

  for each level
    for each rmap in the range
      unmap using rmap

With the preceding patches in the patch series, this made THP page
invalidation more than 5 times faster on our x86 host: the host became
more responsive during swapping the guest's memory as a result.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 58adec38448..a5d6ef785b7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1277,7 +1277,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 
 	kvm_for_each_memslot(memslot, slots) {
 		unsigned long hva_start, hva_end;
-		gfn_t gfn, gfn_end;
+		gfn_t gfn_start, gfn_end;
 
 		hva_start = max(start, memslot->userspace_addr);
 		hva_end = min(end, memslot->userspace_addr +
@@ -1286,19 +1286,27 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 			continue;
 		/*
 		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
-		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
 		 */
-		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_start = hva_to_gfn_memslot(hva_start, memslot);
 		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
-		for (; gfn < gfn_end; ++gfn) {
-			for (j = PT_PAGE_TABLE_LEVEL;
-			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
-				unsigned long *rmapp;
+		for (j = PT_PAGE_TABLE_LEVEL;
+		     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+			unsigned long idx, idx_end;
+			unsigned long *rmapp;
 
-				rmapp = __gfn_to_rmap(gfn, j, memslot);
-				ret |= handler(kvm, rmapp, memslot, data);
-			}
+			/*
+			 * {idx(page_j) | page_j intersects with
+			 *  [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
+			 */
+			idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
+			idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
+
+			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
+
+			for (; idx <= idx_end; ++idx)
+				ret |= handler(kvm, rmapp++, memslot, data);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 2f5f6ad9390c1ebbf738d130dbfe80b60eaa167e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 8 Aug 2011 16:57:47 -0400
Subject: ftrace: Pass ftrace_ops as third parameter to function trace callback

Currently the function trace callback receives only the ip and parent_ip
of the function that it traced. It would be more powerful to also return
the ops that registered the function as well. This allows the same function
to act differently depending on what ftrace_ops registered it.

Link: http://lkml.kernel.org/r/20120612225424.267254552@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h     |   4 ++
 arch/x86/kernel/entry_64.S        |   1 +
 include/linux/ftrace.h            |  16 +++++-
 kernel/trace/ftrace.c             | 101 ++++++++++++++++++++++++++------------
 kernel/trace/trace_event_perf.c   |   3 +-
 kernel/trace/trace_events.c       |   3 +-
 kernel/trace/trace_functions.c    |   9 ++--
 kernel/trace/trace_irqsoff.c      |   3 +-
 kernel/trace/trace_sched_wakeup.c |   2 +-
 kernel/trace/trace_selftest.c     |  15 ++++--
 kernel/trace/trace_stack.c        |   2 +-
 11 files changed, 113 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b0767bc0874..783b107eacb 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,6 +32,10 @@
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
+
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d65133b51b..2b4f94c5dc6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -79,6 +79,7 @@ ENTRY(ftrace_caller)
 
 	MCOUNT_SAVE_FRAME
 
+	leaq function_trace_op, %rdx
 	movq 0x38(%rsp), %rdi
 	movq 8(%rbp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rdi
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 55e6d63d46d..2d596411988 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -18,6 +18,15 @@
 
 #include <asm/ftrace.h>
 
+/*
+ * If the arch supports passing the variable contents of
+ * function_trace_op as the third parameter back from the
+ * mcount call, then the arch should define this as 1.
+ */
+#ifndef ARCH_SUPPORTS_FTRACE_OPS
+#define ARCH_SUPPORTS_FTRACE_OPS 0
+#endif
+
 struct module;
 struct ftrace_hash;
 
@@ -29,7 +38,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos);
 
-typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
+struct ftrace_ops;
+
+typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
+			      struct ftrace_ops *op);
 
 /*
  * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
@@ -163,7 +175,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
 	return *this_cpu_ptr(ops->disabled);
 }
 
-extern void ftrace_stub(unsigned long a0, unsigned long a1);
+extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op);
 
 #else /* !CONFIG_FUNCTION_TRACER */
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b4f20fba09f..4f2ab9352a6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -64,12 +64,19 @@
 
 #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
 
+static struct ftrace_ops ftrace_list_end __read_mostly = {
+	.func		= ftrace_stub,
+};
+
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
 /* Quick disabling of function tracer. */
-int function_trace_stop;
+int function_trace_stop __read_mostly;
+
+/* Current function tracing op */
+struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
 
 /* List for set_ftrace_pid's pids. */
 LIST_HEAD(ftrace_pids);
@@ -86,10 +93,6 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops ftrace_list_end __read_mostly = {
-	.func		= ftrace_stub,
-};
-
 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
@@ -100,8 +103,14 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 static struct ftrace_ops control_ops;
 
-static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
+#if ARCH_SUPPORTS_FTRACE_OPS
+static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op);
+#else
+/* See comment below, where ftrace_ops_list_func is defined */
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
+#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+#endif
 
 /*
  * Traverse the ftrace_global_list, invoking all entries.  The reason that we
@@ -112,29 +121,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
-static void ftrace_global_list_func(unsigned long ip,
-				    unsigned long parent_ip)
+static void
+ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
+			struct ftrace_ops *op)
 {
-	struct ftrace_ops *op;
-
 	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
 		return;
 
 	trace_recursion_set(TRACE_GLOBAL_BIT);
 	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 	while (op != &ftrace_list_end) {
-		op->func(ip, parent_ip);
+		op->func(ip, parent_ip, op);
 		op = rcu_dereference_raw(op->next); /*see above*/
 	};
 	trace_recursion_clear(TRACE_GLOBAL_BIT);
 }
 
-static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
+			    struct ftrace_ops *op)
 {
 	if (!test_tsk_trace_trace(current))
 		return;
 
-	ftrace_pid_function(ip, parent_ip);
+	ftrace_pid_function(ip, parent_ip, op);
 }
 
 static void set_ftrace_pid_function(ftrace_func_t func)
@@ -163,12 +172,13 @@ void clear_ftrace_function(void)
  * For those archs that do not test ftrace_trace_stop in their
  * mcount call site, we need to do it from C.
  */
-static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op)
 {
 	if (function_trace_stop)
 		return;
 
-	__ftrace_trace_function(ip, parent_ip);
+	__ftrace_trace_function(ip, parent_ip, op);
 }
 #endif
 
@@ -230,15 +240,24 @@ static void update_ftrace_function(void)
 
 	/*
 	 * If we are at the end of the list and this ops is
-	 * not dynamic, then have the mcount trampoline call
-	 * the function directly
+	 * not dynamic and the arch supports passing ops, then have the
+	 * mcount trampoline call the function directly.
 	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
 	    (ftrace_ops_list->next == &ftrace_list_end &&
-	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
+	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
+	     ARCH_SUPPORTS_FTRACE_OPS)) {
+		/* Set the ftrace_ops that the arch callback uses */
+		if (ftrace_ops_list == &global_ops)
+			function_trace_op = ftrace_global_list;
+		else
+			function_trace_op = ftrace_ops_list;
 		func = ftrace_ops_list->func;
-	else
+	} else {
+		/* Just use the default ftrace_ops */
+		function_trace_op = &ftrace_list_end;
 		func = ftrace_ops_list_func;
+	}
 
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	ftrace_trace_function = func;
@@ -773,7 +792,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
 }
 
 static void
-function_profile_call(unsigned long ip, unsigned long parent_ip)
+function_profile_call(unsigned long ip, unsigned long parent_ip,
+		      struct ftrace_ops *ops)
 {
 	struct ftrace_profile_stat *stat;
 	struct ftrace_profile *rec;
@@ -803,7 +823,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static int profile_graph_entry(struct ftrace_graph_ent *trace)
 {
-	function_profile_call(trace->func, 0);
+	function_profile_call(trace->func, 0, NULL);
 	return 1;
 }
 
@@ -2790,8 +2810,8 @@ static int __init ftrace_mod_cmd_init(void)
 }
 device_initcall(ftrace_mod_cmd_init);
 
-static void
-function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
+static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
+				      struct ftrace_ops *op)
 {
 	struct ftrace_func_probe *entry;
 	struct hlist_head *hhd;
@@ -3942,10 +3962,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 static void
-ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
+ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
+			struct ftrace_ops *op)
 {
-	struct ftrace_ops *op;
-
 	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
 		return;
 
@@ -3959,7 +3978,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
 	while (op != &ftrace_list_end) {
 		if (!ftrace_function_local_disabled(op) &&
 		    ftrace_ops_test(op, ip))
-			op->func(ip, parent_ip);
+			op->func(ip, parent_ip, op);
 
 		op = rcu_dereference_raw(op->next);
 	};
@@ -3971,8 +3990,9 @@ static struct ftrace_ops control_ops = {
 	.func = ftrace_ops_control_func,
 };
 
-static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+static inline void
+__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *ignored)
 {
 	struct ftrace_ops *op;
 
@@ -3988,13 +4008,32 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 	op = rcu_dereference_raw(ftrace_ops_list);
 	while (op != &ftrace_list_end) {
 		if (ftrace_ops_test(op, ip))
-			op->func(ip, parent_ip);
+			op->func(ip, parent_ip, op);
 		op = rcu_dereference_raw(op->next);
 	};
 	preempt_enable_notrace();
 	trace_recursion_clear(TRACE_INTERNAL_BIT);
 }
 
+/*
+ * Some archs only support passing ip and parent_ip. Even though
+ * the list function ignores the op parameter, we do not want any
+ * C side effects, where a function is called without the caller
+ * sending a third parameter.
+ */
+#if ARCH_SUPPORTS_FTRACE_OPS
+static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op)
+{
+	__ftrace_ops_list_func(ip, parent_ip, NULL);
+}
+#else
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+{
+	__ftrace_ops_list_func(ip, parent_ip, NULL);
+}
+#endif
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fee3752ae8f..a872a9a298a 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
 
 #ifdef CONFIG_FUNCTION_TRACER
 static void
-perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
+perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *ops)
 {
 	struct ftrace_entry *entry;
 	struct hlist_head *head;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29111da1d10..88daa5177bf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1681,7 +1681,8 @@ static __init void event_trace_self_tests(void)
 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
 
 static void
-function_test_events_call(unsigned long ip, unsigned long parent_ip)
+function_test_events_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *op)
 {
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c7b0c6a7db0..fceb7a9aa06 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -48,7 +48,8 @@ static void function_trace_start(struct trace_array *tr)
 }
 
 static void
-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
@@ -75,7 +76,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
 }
 
 static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
+function_trace_call(unsigned long ip, unsigned long parent_ip,
+		    struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
@@ -106,7 +108,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 }
 
 static void
-function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 99d20e92036..2862c77f95d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr,
  * irqsoff uses its own tracer function to keep the overhead down:
  */
 static void
-irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
+		    struct ftrace_ops *op)
 {
 	struct trace_array *tr = irqsoff_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ff791ea48b5..0caf4f5da56 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -108,7 +108,7 @@ out_enable:
  * wakeup uses its own tracer function to keep the overhead down:
  */
 static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op)
 {
 	struct trace_array *tr = wakeup_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 288541f977f..9ae40c823af 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -103,35 +103,40 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
 
 static int trace_selftest_test_probe1_cnt;
 static void trace_selftest_test_probe1_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe1_cnt++;
 }
 
 static int trace_selftest_test_probe2_cnt;
 static void trace_selftest_test_probe2_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe2_cnt++;
 }
 
 static int trace_selftest_test_probe3_cnt;
 static void trace_selftest_test_probe3_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe3_cnt++;
 }
 
 static int trace_selftest_test_global_cnt;
 static void trace_selftest_test_global_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_global_cnt++;
 }
 
 static int trace_selftest_test_dyn_cnt;
 static void trace_selftest_test_dyn_func(unsigned long ip,
-					 unsigned long pip)
+					 unsigned long pip,
+					 struct ftrace_ops *op)
 {
 	trace_selftest_test_dyn_cnt++;
 }
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index d4545f49242..e20006d5fb6 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -111,7 +111,7 @@ static inline void check_stack(void)
 }
 
 static void
-stack_trace_call(unsigned long ip, unsigned long parent_ip)
+stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op)
 {
 	int cpu;
 
-- 
cgit v1.2.3-70-g09d2


From 28fb5dfa783c25dbeeb25a72663f8066a3a517f5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Aug 2011 22:00:55 -0400
Subject: ftrace/x86_32: Push ftrace_ops in as 3rd parameter to function tracer

Add support of passing the current ftrace_ops into the 3rd parameter
of the callback to the function tracer.

Link: http://lkml.kernel.org/r/20120612225424.942411318@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h | 2 +-
 arch/x86/kernel/entry_32.S    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 783b107eacb..b3bb1f3f277 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,7 +32,7 @@
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #endif
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f2883747..e3e17a0b7ff 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1111,6 +1111,7 @@ ENTRY(ftrace_caller)
 	pushl %edx
 	movl 0xc(%esp), %eax
 	movl 0x4(%ebp), %edx
+	leal function_trace_op, %ecx
 	subl $MCOUNT_INSN_SIZE, %eax
 
 .globl ftrace_call
-- 
cgit v1.2.3-70-g09d2


From 08f6fba503111e0336f2b4d6915a4a18f9b60e51 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 30 Apr 2012 16:20:23 -0400
Subject: ftrace/x86: Add separate function to save regs

Add a way to have different functions calling different trampolines.
If a ftrace_ops wants regs saved on the return, then have only the
functions with ops registered to save regs. Functions registered by
other ops would not be affected, unless the functions overlap.

If one ftrace_ops registered functions A, B and C and another ops
registered fucntions to save regs on A, and D, then only functions
A and D would be saving regs. Function B and C would work as normal.
Although A is registered by both ops: normal and saves regs; this is fine
as saving the regs is needed to satisfy one of the ops that calls it
but the regs are ignored by the other ops function.

x86_64 implements the full regs saving, and i386 just passes a NULL
for regs to satisfy the ftrace_ops passing. Where an arch must supply
both regs and ftrace_ops parameters, even if regs is just NULL.

It is OK for an arch to pass NULL regs. All function trace users that
require regs passing must add the flag FTRACE_OPS_FL_SAVE_REGS when
registering the ftrace_ops. If the arch does not support saving regs
then the ftrace_ops will fail to register. The flag
FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED may be set that will prevent the
ftrace_ops from failing to register. In this case, the handler may
either check if regs is not NULL or check if ARCH_SUPPORTS_FTRACE_SAVE_REGS.
If the arch supports passing regs it will set this macro and pass regs
for ops that request them. All other archs will just pass NULL.

Link: Link: http://lkml.kernel.org/r/20120711195745.107705970@goodmis.org

Cc: Alexander van Heukelum <heukelum@fastmail.fm>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h |  47 +++++++++++--------
 arch/x86/kernel/entry_32.S    |   4 +-
 arch/x86/kernel/entry_64.S    |  94 +++++++++++++++++++++++++++++++++----
 arch/x86/kernel/ftrace.c      |  77 ++++++++++++++++++++++++++++--
 include/linux/ftrace.h        | 107 +++++++++++++++++++++++++++++++++++++++---
 kernel/trace/ftrace.c         |  91 +++++++++++++++++++++++++++++++----
 6 files changed, 373 insertions(+), 47 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b3bb1f3f277..a8475014c4a 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,27 +3,33 @@
 
 #ifdef __ASSEMBLY__
 
-	.macro MCOUNT_SAVE_FRAME
-	/* taken from glibc */
-	subq $0x38, %rsp
-	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
+	/* skip is set if the stack was already partially adjusted */
+	.macro MCOUNT_SAVE_FRAME skip=0
+	 /*
+	  * We add enough stack to save all regs.
+	  */
+	subq $(SS+8-\skip), %rsp
+	movq %rax, RAX(%rsp)
+	movq %rcx, RCX(%rsp)
+	movq %rdx, RDX(%rsp)
+	movq %rsi, RSI(%rsp)
+	movq %rdi, RDI(%rsp)
+	movq %r8, R8(%rsp)
+	movq %r9, R9(%rsp)
+	 /* Move RIP to its proper location */
+	movq SS+8(%rsp), %rdx
+	movq %rdx, RIP(%rsp)
 	.endm
 
-	.macro MCOUNT_RESTORE_FRAME
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $0x38, %rsp
+	.macro MCOUNT_RESTORE_FRAME skip=0
+	movq R9(%rsp), %r9
+	movq R8(%rsp), %r8
+	movq RDI(%rsp), %rdi
+	movq RSI(%rsp), %rsi
+	movq RDX(%rsp), %rdx
+	movq RCX(%rsp), %rcx
+	movq RAX(%rsp), %rax
+	addq $(SS+8-\skip), %rsp
 	.endm
 
 #endif
@@ -34,6 +40,9 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+#ifdef CONFIG_X86_64
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#endif
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index e3e17a0b7ff..5da11d1b85a 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1109,7 +1109,8 @@ ENTRY(ftrace_caller)
 	pushl %eax
 	pushl %ecx
 	pushl %edx
-	movl 0xc(%esp), %eax
+	pushl $0	/* Pass NULL as regs pointer */
+	movl 4*4(%esp), %eax
 	movl 0x4(%ebp), %edx
 	leal function_trace_op, %ecx
 	subl $MCOUNT_INSN_SIZE, %eax
@@ -1118,6 +1119,7 @@ ENTRY(ftrace_caller)
 ftrace_call:
 	call ftrace_stub
 
+	addl $4,%esp	/* skip NULL pointer */
 	popl %edx
 	popl %ecx
 	popl %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2b4f94c5dc6..52bda2e8f7a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -73,21 +73,34 @@ ENTRY(mcount)
 	retq
 END(mcount)
 
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+	MCOUNT_SAVE_FRAME \skip
+
+	/* Load the ftrace_ops into the 3rd parameter */
+	leaq function_trace_op, %rdx
+
+	/* Load ip into the first parameter */
+	movq RIP(%rsp), %rdi
+	subq $MCOUNT_INSN_SIZE, %rdi
+	/* Load the parent_ip into the second parameter */
+	movq 8(%rbp), %rsi
+.endm
+
 ENTRY(ftrace_caller)
+	/* Check if tracing was disabled (quick check) */
 	cmpl $0, function_trace_stop
 	jne  ftrace_stub
 
-	MCOUNT_SAVE_FRAME
-
-	leaq function_trace_op, %rdx
-	movq 0x38(%rsp), %rdi
-	movq 8(%rbp), %rsi
-	subq $MCOUNT_INSN_SIZE, %rdi
+	ftrace_caller_setup
+	/* regs go into 4th parameter (but make it NULL) */
+	movq $0, %rcx
 
 GLOBAL(ftrace_call)
 	call ftrace_stub
 
 	MCOUNT_RESTORE_FRAME
+ftrace_return:
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
@@ -98,6 +111,71 @@ GLOBAL(ftrace_stub)
 	retq
 END(ftrace_caller)
 
+ENTRY(ftrace_regs_caller)
+	/* Save the current flags before compare (in SS location)*/
+	pushfq
+
+	/* Check if tracing was disabled (quick check) */
+	cmpl $0, function_trace_stop
+	jne  ftrace_restore_flags
+
+	/* skip=8 to skip flags saved in SS */
+	ftrace_caller_setup 8
+
+	/* Save the rest of pt_regs */
+	movq %r15, R15(%rsp)
+	movq %r14, R14(%rsp)
+	movq %r13, R13(%rsp)
+	movq %r12, R12(%rsp)
+	movq %r11, R11(%rsp)
+	movq %r10, R10(%rsp)
+	movq %rbp, RBP(%rsp)
+	movq %rbx, RBX(%rsp)
+	/* Copy saved flags */
+	movq SS(%rsp), %rcx
+	movq %rcx, EFLAGS(%rsp)
+	/* Kernel segments */
+	movq $__KERNEL_DS, %rcx
+	movq %rcx, SS(%rsp)
+	movq $__KERNEL_CS, %rcx
+	movq %rcx, CS(%rsp)
+	/* Stack - skipping return address */
+	leaq SS+16(%rsp), %rcx
+	movq %rcx, RSP(%rsp)
+
+	/* regs go into 4th parameter */
+	leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+	call ftrace_stub
+
+	/* Copy flags back to SS, to restore them */
+	movq EFLAGS(%rsp), %rax
+	movq %rax, SS(%rsp)
+
+	/* restore the rest of pt_regs */
+	movq R15(%rsp), %r15
+	movq R14(%rsp), %r14
+	movq R13(%rsp), %r13
+	movq R12(%rsp), %r12
+	movq R10(%rsp), %r10
+	movq RBP(%rsp), %rbp
+	movq RBX(%rsp), %rbx
+
+	/* skip=8 to skip flags saved in SS */
+	MCOUNT_RESTORE_FRAME 8
+
+	/* Restore flags */
+	popfq
+
+	jmp ftrace_return
+ftrace_restore_flags:
+	popfq
+	jmp  ftrace_stub
+
+END(ftrace_regs_caller)
+
+
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 ENTRY(mcount)
 	cmpl $0, function_trace_stop
@@ -120,7 +198,7 @@ GLOBAL(ftrace_stub)
 trace:
 	MCOUNT_SAVE_FRAME
 
-	movq 0x38(%rsp), %rdi
+	movq RIP(%rsp), %rdi
 	movq 8(%rbp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rdi
 
@@ -141,7 +219,7 @@ ENTRY(ftrace_graph_caller)
 	MCOUNT_SAVE_FRAME
 
 	leaq 8(%rbp), %rdi
-	movq 0x38(%rsp), %rsi
+	movq RIP(%rsp), %rsi
 	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4bf6e..b90eb1a1307 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,23 @@ static int
 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 		   unsigned const char *new_code);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+/*
+ * Should never be called:
+ *  As it is only called by __ftrace_replace_code() which is called by
+ *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
+ *  which is called to turn mcount into nops or nops into function calls
+ *  but not to convert a function from not using regs to one that uses
+ *  regs, which ftrace_modify_call() is for.
+ */
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+				 unsigned long addr)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+#endif
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +237,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 	ret = ftrace_modify_code(ip, old, new);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+	/* Also update the regs callback function */
+	if (!ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
+		new = ftrace_call_replace(ip, (unsigned long)func);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+#endif
+
 	atomic_dec(&modifying_ftrace_code);
 
 	return ret;
@@ -299,6 +326,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
 	return add_break(rec->ip, old);
 }
 
+/*
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ */
+static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
+/*
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ */
+static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS_EN)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_addr;
@@ -306,7 +359,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -316,6 +369,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 		/* converting nop to call */
 		return add_brk_on_nop(rec);
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
+		ftrace_addr = get_ftrace_old_addr(rec);
+		/* fall through */
 	case FTRACE_UPDATE_MAKE_NOP:
 		/* converting a call to a nop */
 		return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +417,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
 		 * If not, don't touch the breakpoint, we make just create
 		 * a disaster.
 		 */
-		ftrace_addr = (unsigned long)FTRACE_ADDR;
+		ftrace_addr = get_ftrace_addr(rec);
+		nop = ftrace_call_replace(ip, ftrace_addr);
+
+		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
+			goto update;
+
+		/* Check both ftrace_addr and ftrace_old_addr */
+		ftrace_addr = get_ftrace_old_addr(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
 			return -EINVAL;
 	}
 
+ update:
 	return probe_kernel_write((void *)ip, &nop[0], 1);
 }
 
@@ -405,12 +470,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr  = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
 		return add_update_call(rec, ftrace_addr);
@@ -455,12 +522,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_update_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
 		return finish_update_call(rec, ftrace_addr);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e4202881fb0..ab39990cc43 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -71,12 +71,28 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  *           could be controled by following calls:
  *             ftrace_function_local_enable
  *             ftrace_function_local_disable
+ * SAVE_REGS - The ftrace_ops wants regs saved at each function called
+ *            and passed to the callback. If this flag is set, but the
+ *            architecture does not support passing regs
+ *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ *            ftrace_ops will fail to register, unless the next flag
+ *            is set.
+ * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
+ *            handler can handle an arch that does not save regs
+ *            (the handler tests if regs == NULL), then it can set
+ *            this flag instead. It will not fail registering the ftrace_ops
+ *            but, the regs field will be NULL if the arch does not support
+ *            passing regs to the handler.
+ *            Note, if this flag is set, the SAVE_REGS flag will automatically
+ *            get set upon registering the ftrace_ops, if the arch supports it.
  */
 enum {
-	FTRACE_OPS_FL_ENABLED		= 1 << 0,
-	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
-	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
-	FTRACE_OPS_FL_CONTROL		= 1 << 3,
+	FTRACE_OPS_FL_ENABLED			= 1 << 0,
+	FTRACE_OPS_FL_GLOBAL			= 1 << 1,
+	FTRACE_OPS_FL_DYNAMIC			= 1 << 2,
+	FTRACE_OPS_FL_CONTROL			= 1 << 3,
+	FTRACE_OPS_FL_SAVE_REGS			= 1 << 4,
+	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 5,
 };
 
 struct ftrace_ops {
@@ -254,12 +270,31 @@ extern void unregister_ftrace_function_probe_all(char *glob);
 
 extern int ftrace_text_reserved(void *start, void *end);
 
+/*
+ * The dyn_ftrace record's flags field is split into two parts.
+ * the first part which is '0-FTRACE_REF_MAX' is a counter of
+ * the number of callbacks that have registered the function that
+ * the dyn_ftrace descriptor represents.
+ *
+ * The second part is a mask:
+ *  ENABLED - the function is being traced
+ *  REGS    - the record wants the function to save regs
+ *  REGS_EN - the function is set up to save regs.
+ *
+ * When a new ftrace_ops is registered and wants a function to save
+ * pt_regs, the rec->flag REGS is set. When the function has been
+ * set up to save regs, the REG_EN flag is set. Once a function
+ * starts saving regs it will do so until all ftrace_ops are removed
+ * from tracing that function.
+ */
 enum {
-	FTRACE_FL_ENABLED	= (1 << 30),
+	FTRACE_FL_ENABLED	= (1UL << 29),
+	FTRACE_FL_REGS		= (1UL << 30),
+	FTRACE_FL_REGS_EN	= (1UL << 31)
 };
 
-#define FTRACE_FL_MASK		(0x3UL << 30)
-#define FTRACE_REF_MAX		((1 << 30) - 1)
+#define FTRACE_FL_MASK		(0x7UL << 29)
+#define FTRACE_REF_MAX		((1UL << 29) - 1)
 
 struct dyn_ftrace {
 	union {
@@ -290,9 +325,23 @@ enum {
 	FTRACE_STOP_FUNC_RET		= (1 << 4),
 };
 
+/*
+ * The FTRACE_UPDATE_* enum is used to pass information back
+ * from the ftrace_update_record() and ftrace_test_record()
+ * functions. These are called by the code update routines
+ * to find out what is to be done for a given function.
+ *
+ *  IGNORE           - The function is already what we want it to be
+ *  MAKE_CALL        - Start tracing the function
+ *  MODIFY_CALL      - Stop saving regs for the function
+ *  MODIFY_CALL_REGS - Start saving regs for the function
+ *  MAKE_NOP         - Stop tracing the function
+ */
 enum {
 	FTRACE_UPDATE_IGNORE,
 	FTRACE_UPDATE_MAKE_CALL,
+	FTRACE_UPDATE_MODIFY_CALL,
+	FTRACE_UPDATE_MODIFY_CALL_REGS,
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
@@ -344,7 +393,9 @@ extern int ftrace_dyn_arch_init(void *data);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
+extern void ftrace_regs_caller(void);
 extern void ftrace_call(void);
+extern void ftrace_regs_call(void);
 extern void mcount_call(void);
 
 void ftrace_modify_all_code(int command);
@@ -352,6 +403,15 @@ void ftrace_modify_all_code(int command);
 #ifndef FTRACE_ADDR
 #define FTRACE_ADDR ((unsigned long)ftrace_caller)
 #endif
+
+#ifndef FTRACE_REGS_ADDR
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
+#else
+# define FTRACE_REGS_ADDR FTRACE_ADDR
+#endif
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern void ftrace_graph_caller(void);
 extern int ftrace_enable_ftrace_graph_caller(void);
@@ -407,6 +467,39 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+/**
+ * ftrace_modify_call - convert from one addr to another (no nop)
+ * @rec: the mcount call site record
+ * @old_addr: the address expected to be currently called to
+ * @addr: the address to change to
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch.  The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * The code segment at @rec->ip should be a caller to @old_addr
+ *
+ * Return must be:
+ *  0 on success
+ *  -EFAULT on error reading the location
+ *  -EINVAL on a failed compare of the contents
+ *  -EPERM  on error writing to the location
+ * Any other value will be considered a failure.
+ */
+extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			      unsigned long addr);
+#else
+/* Should never be called */
+static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+				     unsigned long addr)
+{
+	return -EINVAL;
+}
+#endif
+
 /* May be defined in arch */
 extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6ff07ad0ede..c55f7e27461 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -314,6 +314,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
 		return -EINVAL;
 
+#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+	/*
+	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
+	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
+	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
+	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
+		return -EINVAL;
+
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
+		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
+#endif
+
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
@@ -1515,6 +1529,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			rec->flags++;
 			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
 				return;
+			/*
+			 * If any ops wants regs saved for this function
+			 * then all ops will get saved regs.
+			 */
+			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+				rec->flags |= FTRACE_FL_REGS;
 		} else {
 			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
 				return;
@@ -1606,18 +1626,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 	if (enable && (rec->flags & ~FTRACE_FL_MASK))
 		flag = FTRACE_FL_ENABLED;
 
+	/*
+	 * If enabling and the REGS flag does not match the REGS_EN, then
+	 * do not ignore this record. Set flags to fail the compare against
+	 * ENABLED.
+	 */
+	if (flag &&
+	    (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)))
+		flag |= FTRACE_FL_REGS;
+
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
 		return FTRACE_UPDATE_IGNORE;
 
 	if (flag) {
-		if (update)
+		/* Save off if rec is being enabled (for return value) */
+		flag ^= rec->flags & FTRACE_FL_ENABLED;
+
+		if (update) {
 			rec->flags |= FTRACE_FL_ENABLED;
-		return FTRACE_UPDATE_MAKE_CALL;
+			if (flag & FTRACE_FL_REGS) {
+				if (rec->flags & FTRACE_FL_REGS)
+					rec->flags |= FTRACE_FL_REGS_EN;
+				else
+					rec->flags &= ~FTRACE_FL_REGS_EN;
+			}
+		}
+
+		/*
+		 * If this record is being updated from a nop, then
+		 *   return UPDATE_MAKE_CALL.
+		 * Otherwise, if the EN flag is set, then return
+		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert
+		 *   from the non-save regs, to a save regs function.
+		 * Otherwise,
+		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
+		 *   from the save regs, to a non-save regs function.
+		 */
+		if (flag & FTRACE_FL_ENABLED)
+			return FTRACE_UPDATE_MAKE_CALL;
+		else if (rec->flags & FTRACE_FL_REGS_EN)
+			return FTRACE_UPDATE_MODIFY_CALL_REGS;
+		else
+			return FTRACE_UPDATE_MODIFY_CALL;
 	}
 
-	if (update)
-		rec->flags &= ~FTRACE_FL_ENABLED;
+	if (update) {
+		/* If there's no more users, clear all flags */
+		if (!(rec->flags & ~FTRACE_FL_MASK))
+			rec->flags = 0;
+		else
+			/* Just disable the record (keep REGS state) */
+			rec->flags &= ~FTRACE_FL_ENABLED;
+	}
 
 	return FTRACE_UPDATE_MAKE_NOP;
 }
@@ -1652,13 +1713,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
+	unsigned long ftrace_old_addr;
 	unsigned long ftrace_addr;
 	int ret;
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
-
 	ret = ftrace_update_record(rec, enable);
 
+	if (rec->flags & FTRACE_FL_REGS)
+		ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
+	else
+		ftrace_addr = (unsigned long)FTRACE_ADDR;
+
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
@@ -1668,6 +1733,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 
 	case FTRACE_UPDATE_MAKE_NOP:
 		return ftrace_make_nop(NULL, rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
+		if (rec->flags & FTRACE_FL_REGS)
+			ftrace_old_addr = (unsigned long)FTRACE_ADDR;
+		else
+			ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
+
+		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
 	}
 
 	return -1; /* unknow ftrace bug */
@@ -2421,8 +2495,9 @@ static int t_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "%ps", (void *)rec->ip);
 	if (iter->flags & FTRACE_ITER_ENABLED)
-		seq_printf(m, " (%ld)",
-			   rec->flags & ~FTRACE_FL_MASK);
+		seq_printf(m, " (%ld)%s",
+			   rec->flags & ~FTRACE_FL_MASK,
+			   rec->flags & FTRACE_FL_REGS ? " R" : "");
 	seq_printf(m, "\n");
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 4de72395ff4cf48e23b61986dbc90b99a7c4ed97 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 Jun 2012 20:00:11 -0400
Subject: ftrace/x86: Add save_regs for i386 function calls

Add saving full regs for function tracing on i386.
The saving of regs was influenced by patches sent out by
Masami Hiramatsu.

Link: Link: http://lkml.kernel.org/r/20120711195745.379060003@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h |  2 --
 arch/x86/kernel/entry_32.S    | 68 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/ftrace.c      |  4 ---
 3 files changed, 68 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a8475014c4a..a6cae0c1720 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -40,10 +40,8 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
-#ifdef CONFIG_X86_64
 #define ARCH_SUPPORTS_FTRACE_SAVE_REGS
 #endif
-#endif
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5da11d1b85a..46caa5649a5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1123,6 +1123,7 @@ ftrace_call:
 	popl %edx
 	popl %ecx
 	popl %eax
+ftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 ftrace_graph_call:
@@ -1134,6 +1135,73 @@ ftrace_stub:
 	ret
 END(ftrace_caller)
 
+ENTRY(ftrace_regs_caller)
+	pushf	/* push flags before compare (in cs location) */
+	cmpl $0, function_trace_stop
+	jne ftrace_restore_flags
+
+	/*
+	 * i386 does not save SS and ESP when coming from kernel.
+	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
+	 * Unfortunately, that means eflags must be at the same location
+	 * as the current return ip is. We move the return ip into the
+	 * ip location, and move flags into the return ip location.
+	 */
+	pushl 4(%esp)	/* save return ip into ip slot */
+	subl $MCOUNT_INSN_SIZE, (%esp)	/* Adjust ip */
+
+	pushl $0	/* Load 0 into orig_ax */
+	pushl %gs
+	pushl %fs
+	pushl %es
+	pushl %ds
+	pushl %eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	pushl %ecx
+	pushl %ebx
+
+	movl 13*4(%esp), %eax	/* Get the saved flags */
+	movl %eax, 14*4(%esp)	/* Move saved flags into regs->flags location */
+				/* clobbering return ip */
+	movl $__KERNEL_CS,13*4(%esp)
+
+	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
+	movl 0x4(%ebp), %edx	/* Load parent ip (2cd parameter) */
+	lea  (%esp), %ecx
+	pushl %ecx		/* Save pt_regs as 4th parameter */
+	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+
+GLOBAL(ftrace_regs_call)
+	call ftrace_stub
+
+	addl $4, %esp		/* Skip pt_regs */
+	movl 14*4(%esp), %eax	/* Move flags back into cs */
+	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
+	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
+	addl $MCOUNT_INSN_SIZE, %eax
+	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
+
+	popl %ebx
+	popl %ecx
+	popl %edx
+	popl %esi
+	popl %edi
+	popl %ebp
+	popl %eax
+	popl %ds
+	popl %es
+	popl %fs
+	popl %gs
+	addl $8, %esp		/* Skip orig_ax and ip */
+	popf			/* Pop flags at end (no addl to corrupt flags) */
+	jmp ftrace_ret
+
+ftrace_restore_flags:
+	popf
+	jmp  ftrace_stub
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(mcount)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b90eb1a1307..1d414029f1d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,7 +206,6 @@ static int
 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 		   unsigned const char *new_code);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 /*
  * Should never be called:
  *  As it is only called by __ftrace_replace_code() which is called by
@@ -221,7 +220,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	WARN_ON(1);
 	return -EINVAL;
 }
-#endif
 
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
@@ -237,7 +235,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 	ret = ftrace_modify_code(ip, old, new);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 	/* Also update the regs callback function */
 	if (!ret) {
 		ip = (unsigned long)(&ftrace_regs_call);
@@ -245,7 +242,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 		new = ftrace_call_replace(ip, (unsigned long)func);
 		ret = ftrace_modify_code(ip, old, new);
 	}
-#endif
 
 	atomic_dec(&modifying_ftrace_code);
 
-- 
cgit v1.2.3-70-g09d2


From 9d3c92af47d853d4e31ee971dba7bc086275b7b3 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:50:48 +0800
Subject: KVM: x86: remove unnecessary mark_page_dirty

fix:
[  132.474633] 3.5.0-rc1+ #50 Not tainted
[  132.474634] -------------------------------
[  132.474635] include/linux/kvm_host.h:369 suspicious rcu_dereference_check() usage!
[  132.474636]
[  132.474636] other info that might help us debug this:
[  132.474636]
[  132.474638]
[  132.474638] rcu_scheduler_active = 1, debug_locks = 1
[  132.474640] 1 lock held by qemu-kvm/2832:
[  132.474657]  #0:  (&vcpu->mutex){+.+.+.}, at: [<ffffffffa01e1636>] vcpu_load+0x1e/0x91 [kvm]
[  132.474658]
[  132.474658] stack backtrace:
[  132.474660] Pid: 2832, comm: qemu-kvm Not tainted 3.5.0-rc1+ #50
[  132.474661] Call Trace:
[  132.474665]  [<ffffffff81092f40>] lockdep_rcu_suspicious+0xfc/0x105
[  132.474675]  [<ffffffffa01e0c85>] kvm_memslots+0x6d/0x75 [kvm]
[  132.474683]  [<ffffffffa01e0ca1>] gfn_to_memslot+0x14/0x4c [kvm]
[  132.474693]  [<ffffffffa01e3575>] mark_page_dirty+0x17/0x2a [kvm]
[  132.474706]  [<ffffffffa01f21ea>] kvm_arch_vcpu_ioctl+0xbcf/0xc07 [kvm]

Actually, we do not write vcpu->arch.time at this time, mark_page_dirty
should be removed.

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 829b4e97255..ecc71dde4bb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.time_page)
 		return -EINVAL;
 	src->flags |= PVCLOCK_GUEST_STOPPED;
-	mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 86fde74cf5b829627b37ca86322acfdd99b524b8 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:52:52 +0800
Subject: KVM: MMU: track the refcount when unmap the page

It will trigger a WARN_ON if the page has been freed but it is still
used in mmu, it can help us to detect mm bug early

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5d6ef785b7..685a4855738 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 		return 0;
 
 	pfn = spte_to_pfn(old_spte);
+
+	/*
+	 * KVM does not hold the refcount of the page used by
+	 * kvm mmu, before reclaiming the page, we should
+	 * unmap it from mmu first.
+	 */
+	WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+
 	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
 	if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
-- 
cgit v1.2.3-70-g09d2


From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:54:11 +0800
Subject: KVM: using get_fault_pfn to get the fault pfn

Using get_fault_pfn to cleanup the code

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c       |  6 ++----
 include/linux/kvm_host.h |  5 +----
 virt/kvm/kvm_main.c      | 13 ++++---------
 3 files changed, 7 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 685a4855738..f85cc21ae95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2513,10 +2513,8 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 	unsigned long hva;
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
-	if (!slot) {
-		get_page(fault_page);
-		return page_to_pfn(fault_page);
-	}
+	if (!slot)
+		return get_fault_pfn();
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6f6c18a03c5..1a7f838d30c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -383,15 +383,11 @@ id_to_memslot(struct kvm_memslots *slots, int id)
 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
 
 extern struct page *bad_page;
-extern struct page *fault_page;
-
 extern pfn_t bad_pfn;
-extern pfn_t fault_pfn;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
 int is_hwpoison_pfn(pfn_t pfn);
-int is_fault_pfn(pfn_t pfn);
 int is_noslot_pfn(pfn_t pfn);
 int is_invalid_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
@@ -441,6 +437,7 @@ void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
 void kvm_get_pfn(pfn_t pfn);
+pfn_t get_fault_pfn(void);
 
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e2b1a159e5d..0fbbf2d2160 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -103,8 +103,8 @@ static bool largepages_enabled = true;
 static struct page *hwpoison_page;
 static pfn_t hwpoison_pfn;
 
-struct page *fault_page;
-pfn_t fault_pfn;
+static struct page *fault_page;
+static pfn_t fault_pfn;
 
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
@@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
 
-int is_fault_pfn(pfn_t pfn)
-{
-	return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-
 int is_noslot_pfn(pfn_t pfn)
 {
 	return pfn == bad_pfn;
@@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-static pfn_t get_fault_pfn(void)
+pfn_t get_fault_pfn(void)
 {
 	get_page(fault_page);
 	return fault_pfn;
 }
+EXPORT_SYMBOL_GPL(get_fault_pfn);
 
 int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	unsigned long start, int write, struct page **page)
-- 
cgit v1.2.3-70-g09d2


From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:56:16 +0800
Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot

The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove
it

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c |  2 +-
 arch/x86/kvm/mmu.c          |  2 +-
 include/linux/kvm_host.h    |  5 ++---
 virt/kvm/iommu.c            | 10 +++++-----
 virt/kvm/kvm_main.c         | 15 +++++++--------
 5 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c510fc96130..c8f6c582674 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	if (likely(!pfnmap)) {
 		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
-		pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+		pfn = gfn_to_pfn_memslot(slot, gfn);
 		if (is_error_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f85cc21ae95..4f77f7ac6d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2518,7 +2518,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
-	return hva_to_pfn_atomic(vcpu->kvm, hva);
+	return hva_to_pfn_atomic(hva);
 }
 
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e8d13a072d2..db9aa917840 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -418,15 +418,14 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t hva_to_pfn_atomic(unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
 		       bool write_fault, bool *writable);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-			 struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf..c03f1fb2670 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
-			   gfn_t gfn, unsigned long size)
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+			   unsigned long size)
 {
 	gfn_t end_gfn;
 	pfn_t pfn;
 
-	pfn     = gfn_to_pfn_memslot(kvm, slot, gfn);
+	pfn     = gfn_to_pfn_memslot(slot, gfn);
 	end_gfn = gfn + (size >> PAGE_SHIFT);
 	gfn    += 1;
 
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
 		return pfn;
 
 	while (gfn < end_gfn)
-		gfn_to_pfn_memslot(kvm, slot, gfn++);
+		gfn_to_pfn_memslot(slot, gfn++);
 
 	return pfn;
 }
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 * Pin all pages we are about to map in memory. This is
 		 * important because we unmap and unpin in 4kb steps later.
 		 */
-		pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+		pfn = kvm_pin_pages(slot, gfn, page_size);
 		if (is_error_pfn(pfn)) {
 			gfn += 1;
 			continue;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f955eee92aa..68dda513cd7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
 	return rc == -EHWPOISON;
 }
 
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-			bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+			bool write_fault, bool *writable)
 {
 	struct page *page[1];
 	int npages = 0;
@@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
 	return pfn;
 }
 
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
 {
-	return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+	return hva_to_pfn(addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
@@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+	return hva_to_pfn(addr, atomic, async, write_fault, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-			 struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+	return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
-- 
cgit v1.2.3-70-g09d2


From 0fa060714753ef65aef294b3462efb0212520933 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:16:19 +0800
Subject: KVM: VMX: Fix typos

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39b60707e0..2300e5319ed 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	guest_efer = vmx->vcpu.arch.efer;
 
 	/*
-	 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
+	 * NX is emulated; LMA and LME handled by hardware; SCE meaningless
 	 * outside long mode
 	 */
 	ignore_bits = EFER_NX | EFER_SCE;
@@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	 * qemu binaries.
 	 *   IA32 arch specifies that at the time of processor reset the
 	 * "Accessed" bit in the AR field of segment registers is 1. And qemu
-	 * is setting it to 0 in the usedland code. This causes invalid guest
+	 * is setting it to 0 in the userland code. This causes invalid guest
 	 * state vmexit when "unrestricted guest" mode is turned on.
 	 *    Fix for this setup issue in cpu_reset is being pushed in the qemu
 	 * tree. Newer qemu binaries with that qemu fix would not need this
@@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 	hypercall[2] = 0xc1;
 }
 
-/* called to set cr0 as approriate for a mov-to-cr0 exit. */
+/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	if (to_vmx(vcpu)->nested.vmxon &&
-- 
cgit v1.2.3-70-g09d2


From c5ec2e56d0a654797ee43a31001738ccd05eef0b Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:16:43 +0800
Subject: KVM: SVM: Fix typos

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/svm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index baead950d6c..687d0c30e55 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
 	if (svm->nested.intercept & 1ULL) {
 		/*
 		 * The #vmexit can't be emulated here directly because this
-		 * code path runs with irqs and preemtion disabled. A
+		 * code path runs with irqs and preemption disabled. A
 		 * #vmexit emulation might sleep. Only signal request for
 		 * the #vmexit here.
 		 */
@@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
 	/*
 	 * This function merges the msr permission bitmaps of kvm and the
-	 * nested vmcb. It is omptimized in that it only merges the parts where
+	 * nested vmcb. It is optimized in that it only merges the parts where
 	 * the kvm msr permission bitmap may contain zero bits
 	 */
 	int i;
-- 
cgit v1.2.3-70-g09d2


From 4a9699807c491740c4dfe7b6a06703e1d262e802 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:17:27 +0800
Subject: KVM: x86: Fix typos in x86.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ecc71dde4bb..3d9d08edbf2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 		 * For each generation, we track the original measured
 		 * nanosecond time, offset, and write, so if TSCs are in
 		 * sync, we can match exact offset, and if not, we can match
-		 * exact software computaion in compute_guest_tsc()
+		 * exact software computation in compute_guest_tsc()
 		 *
 		 * These values are tracked in kvm->arch.cur_xxx variables.
 		 */
@@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
 
-	/* Bits 2:5 are resrved, Should be zero */
+	/* Bits 2:5 are reserved, Should be zero */
 	if (data & 0x3c)
 		return 1;
 
@@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		 * Ignore all writes to this no longer documented MSR.
 		 * Writes are only relevant for old K7 processors,
 		 * all pre-dating SVM, but a recommended workaround from
-		 * AMD for these chips. It is possible to speicify the
+		 * AMD for these chips. It is possible to specify the
 		 * affected processor models on the command line, hence
 		 * the need to ignore the workaround.
 		 */
@@ -4491,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 
 	/*
 	 * if emulation was due to access to shadowed page table
-	 * and it failed try to unshadow page and re-entetr the
+	 * and it failed try to unshadow page and re-enter the
 	 * guest to let CPU execute the instruction.
 	 */
 	if (kvm_mmu_unprotect_page_virt(vcpu, gva))
@@ -5587,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		/*
 		 * We are here if userspace calls get_regs() in the middle of
 		 * instruction emulation. Registers state needs to be copied
-		 * back from emulation context to vcpu. Usrapace shouldn't do
+		 * back from emulation context to vcpu. Userspace shouldn't do
 		 * that usually, but some bad designed PV devices (vmware
 		 * backdoor interface) need this to work
 		 */
@@ -6116,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	 * as we reset last_host_tsc on all VCPUs to stop this from being
 	 * called multiple times (one for each physical CPU bringup).
 	 *
-	 * Platforms with unnreliable TSCs don't have to deal with this, they
+	 * Platforms with unreliable TSCs don't have to deal with this, they
 	 * will be compensated by the logic in vcpu_load, which sets the TSC to
 	 * catchup mode.  This will catchup all VCPUs to real time, but cannot
 	 * guarantee that they stay in perfect synchronization.
@@ -6391,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		map_flags = MAP_SHARED | MAP_ANONYMOUS;
 
 	/*To keep backward compatibility with older userspace,
-	 *x86 needs to hanlde !user_alloc case.
+	 *x86 needs to handle !user_alloc case.
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
-- 
cgit v1.2.3-70-g09d2


From fc0586807dc4e307da6d3ba4ed5c927b6d27276c Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:19:51 +0800
Subject: KVM: x86: Fix typos in emulate.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a9914ba..85b611e13e8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
 				goto bad;
 		} else {
-			/* exapand-down segment */
+			/* expand-down segment */
 			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
@@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	err_code = selector & 0xfffc;
 	err_vec = GP_VECTOR;
 
-	/* can't load system descriptor into segment selecor */
+	/* can't load system descriptor into segment selector */
 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
 		goto exception;
 
@@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
 
 	/*
-	 * Now load segment descriptors. If fault happenes at this stage
+	 * Now load segment descriptors. If fault happens at this stage
 	 * it is handled in a context of new task
 	 */
 	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
@@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check agains DPL of the TSS
+	 * 3. jmp/call to TSS: Check against DPL of the TSS
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
 
 	/* set back link to prev task only if NT bit is set in eflags
-	   note that old_tss_sel is not used afetr this point */
+	   note that old_tss_sel is not used after this point */
 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
 		old_tss_sel = 0xffff;
 
-- 
cgit v1.2.3-70-g09d2


From bbbda79510b6e5d399fae76bdb9b999286eb1b59 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:20:58 +0800
Subject: KVM: x86: Fix typos in cpuid.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0595f1397b7..b496da684bd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	}
 	case 7: {
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		/* Mask ebx against host capbability word 9 */
+		/* Mask ebx against host capability word 9 */
 		if (index == 0) {
 			entry->ebx &= kvm_supported_word9_x86_features;
 			cpuid_mask(&entry->ebx, 9);
-- 
cgit v1.2.3-70-g09d2


From d5b0b5b196b474ef26f46f2036c8fbaa8cca2cf5 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:22:57 +0800
Subject: KVM: x86: Fix typos in lapic.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce878788a39..fff7173f6a7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 {
 	unsigned char alignment = offset & 0xf;
 	u32 result;
-	/* this bitmask has a bit cleared for each reserver register */
+	/* this bitmask has a bit cleared for each reserved register */
 	static const u64 rmask = 0x43ff01ffffffe70cULL;
 
 	if ((alignment + len) > 4) {
@@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	atomic_set(&apic->lapic_timer.pending, 0);
 
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
-		/* lapic timer in oneshot or peroidic mode */
+		/* lapic timer in oneshot or periodic mode */
 		now = apic->lapic_timer.timer.base->get_time();
 		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
 			    * APIC_BUS_CYCLE_NS * apic->divide_count;
-- 
cgit v1.2.3-70-g09d2


From c7a7062fa00db7dc66280a72cd9dad0f3595bc66 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:23:08 +0800
Subject: KVM: x86: Fix typos in pmu.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2e88438ffd8..db206a46e0d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1,5 +1,5 @@
 /*
- * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ * Kernel-based Virtual Machine -- Performance Monitoring Unit support
  *
  * Copyright 2011 Red Hat, Inc. and/or its affiliates.
  *
-- 
cgit v1.2.3-70-g09d2


From 93b6547e2219784b2df790353e083e0bdbebd2c2 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 19 Jul 2012 13:55:53 +0300
Subject: KVM: switch to symbolic name for irq_states size

Use PIC_NUM_PINS instead of hard-coded 16 for pic pins.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/irq.h  | 2 +-
 virt/kvm/irq_comm.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2086f2bfba3..2d03568e949 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -70,7 +70,7 @@ struct kvm_pic {
 	struct kvm_io_device dev_slave;
 	struct kvm_io_device dev_eclr;
 	void (*ack_notifier)(void *opaque, int irq);
-	unsigned long irq_states[16];
+	unsigned long irq_states[PIC_NUM_PINS];
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a6a0365475e..22aae8fd146 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -343,11 +343,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 		switch (ue->u.irqchip.irqchip) {
 		case KVM_IRQCHIP_PIC_MASTER:
 			e->set = kvm_set_pic_irq;
-			max_pin = 16;
+			max_pin = PIC_NUM_PINS;
 			break;
 		case KVM_IRQCHIP_PIC_SLAVE:
 			e->set = kvm_set_pic_irq;
-			max_pin = 16;
+			max_pin = PIC_NUM_PINS;
 			delta = 8;
 			break;
 		case KVM_IRQCHIP_IOAPIC:
-- 
cgit v1.2.3-70-g09d2


From f2a743473194a1ad44a85f8b63aeef9d63e5bf47 Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Wed, 18 Jul 2012 19:07:32 +0530
Subject: KVM: Add config to support ple or cpu relax optimzation

Suggested-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # on s390x
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/Kconfig | 1 +
 arch/x86/kvm/Kconfig  | 1 +
 virt/kvm/Kconfig      | 3 +++
 3 files changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 78eb9847008..a6e2677724e 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	depends on HAVE_KVM && EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a28f338843e..45c044f0fff 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM
 	select TASK_DELAY_ACCT
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a913..d01b24b72c6 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
 
 config HAVE_KVM_MSI
        bool
+
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+       bool
-- 
cgit v1.2.3-70-g09d2


From 3b2bd2f800ba9488d9ad493216a0c07d71055b56 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 26 Jul 2012 11:57:43 +0800
Subject: KVM: MMU: use kvm_release_pfn_clean to release pfn

The current code depends on the fact that fault_page is the normal page,
however, we will use the error code instead of these dummy pages in the
later patch, so we use kvm_release_pfn_clean to release pfn which will
release the error code properly

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24199344359..a9a20528e70 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3275,7 +3275,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	put_page(pfn_to_page(*pfn));
+	kvm_release_pfn_clean(*pfn);
 
 	if (!prefault && can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
-- 
cgit v1.2.3-70-g09d2


From f23b070e662ca55a8fdaaa28537af06cab664499 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 26 Jul 2012 13:12:22 +0800
Subject: KVM: x86 emulator: simplify read_emulated

No need split mmio read region into 8-bits pieces since we do it in
emulator_read_write_onepage

Changelog:
  Add a WARN_ON to check read-cache overflow

Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 85b611e13e8..2c5d1e65d9d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1166,24 +1166,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	struct read_cache *mc = &ctxt->mem_read;
 
-	while (size) {
-		int n = min(size, 8u);
-		size -= n;
-		if (mc->pos < mc->end)
-			goto read_cached;
-
-		rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
-					      &ctxt->exception);
-		if (rc != X86EMUL_CONTINUE)
-			return rc;
-		mc->end += n;
+	if (mc->pos < mc->end)
+		goto read_cached;
 
-	read_cached:
-		memcpy(dest, mc->data + mc->pos, n);
-		mc->pos += n;
-		dest += n;
-		addr += n;
-	}
+	WARN_ON((mc->end + size) >= sizeof(mc->data));
+
+	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
+				      &ctxt->exception);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	mc->end += size;
+
+read_cached:
+	memcpy(dest, mc->data + mc->pos, size);
+	mc->pos += size;
 	return X86EMUL_CONTINUE;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 99245b507dc3b1b2815d6a6cb4e94a6b7018a24b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 25 Jul 2012 15:49:42 +0300
Subject: KVM: x86 emulator: drop unneeded call to get_segment()

setup_syscalls_segments() calls get_segment() and than overwrites all
but one of the structure fields and this one should also be overwritten
anyway, so we can drop call to get_segment() and avoid a couple of vmreads
on vmx. Also drop zeroing ss/cs structures since most of the fields are
set anyway. Just set those that were not set explicitly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2c5d1e65d9d..10f0136f50c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2035,12 +2035,6 @@ static void
 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 			struct desc_struct *cs, struct desc_struct *ss)
 {
-	u16 selector;
-
-	memset(cs, 0, sizeof(struct desc_struct));
-	ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
-	memset(ss, 0, sizeof(struct desc_struct));
-
 	cs->l = 0;		/* will be adjusted later */
 	set_desc_base(cs, 0);	/* flat segment */
 	cs->g = 1;		/* 4kb granularity */
@@ -2050,6 +2044,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 	cs->dpl = 0;		/* will be adjusted later */
 	cs->p = 1;
 	cs->d = 1;
+	cs->avl = 0;
 
 	set_desc_base(ss, 0);	/* flat segment */
 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
@@ -2059,6 +2054,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 	ss->d = 1;		/* 32bit stack segment */
 	ss->dpl = 0;
 	ss->p = 1;
+	ss->l = 0;
+	ss->avl = 0;
 }
 
 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
-- 
cgit v1.2.3-70-g09d2


From 23d43cf998275bc97437931c0cdee1df2c1aa3ca Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Tue, 24 Jul 2012 08:51:20 -0400
Subject: KVM: Move KVM_IRQ_LINE to arch-generic code

Handle KVM_IRQ_LINE and KVM_IRQ_LINE_STATUS in the generic
kvm_vm_ioctl() function and call into kvm_vm_ioctl_irq_line().

This is even more relevant when KVM/ARM also uses this ioctl.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 33 ++++++++++-----------------------
 arch/x86/kvm/x86.c       | 33 ++++++++++-----------------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 23 +++++++++++++++++++++++
 4 files changed, 44 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bd77cb507c1..eac65380bd2 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level);
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		unsigned int ioctl, unsigned long arg)
 {
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		}
 		break;
-	case KVM_IRQ_LINE_STATUS:
-	case KVM_IRQ_LINE: {
-		struct kvm_irq_level irq_event;
-
-		r = -EFAULT;
-		if (copy_from_user(&irq_event, argp, sizeof irq_event))
-			goto out;
-		r = -ENXIO;
-		if (irqchip_in_kernel(kvm)) {
-			__s32 status;
-			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-				    irq_event.irq, irq_event.level);
-			if (ioctl == KVM_IRQ_LINE_STATUS) {
-				r = -EFAULT;
-				irq_event.status = status;
-				if (copy_to_user(argp, &irq_event,
-							sizeof irq_event))
-					goto out;
-			}
-			r = 0;
-		}
-		break;
-		}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip chip;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d9d08edbf2..b6379e55ee2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3165,6 +3165,16 @@ out:
 	return r;
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level);
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -3271,29 +3281,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	create_pit_unlock:
 		mutex_unlock(&kvm->slots_lock);
 		break;
-	case KVM_IRQ_LINE_STATUS:
-	case KVM_IRQ_LINE: {
-		struct kvm_irq_level irq_event;
-
-		r = -EFAULT;
-		if (copy_from_user(&irq_event, argp, sizeof irq_event))
-			goto out;
-		r = -ENXIO;
-		if (irqchip_in_kernel(kvm)) {
-			__s32 status;
-			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event.irq, irq_event.level);
-			if (ioctl == KVM_IRQ_LINE_STATUS) {
-				r = -EFAULT;
-				irq_event.status = status;
-				if (copy_to_user(argp, &irq_event,
-							sizeof irq_event))
-					goto out;
-			}
-			r = 0;
-		}
-		break;
-	}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip *chip;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4e60d3695e4..dbc65f9d6a2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -498,6 +498,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 				   struct
 				   kvm_userspace_memory_region *mem,
 				   int user_alloc);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de89497fe4c..bcf973ec98f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2148,6 +2148,29 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_send_userspace_msi(kvm, &msi);
 		break;
 	}
+#endif
+#ifdef __KVM_HAVE_IRQ_LINE
+	case KVM_IRQ_LINE_STATUS:
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level irq_event;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+
+		r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (ioctl == KVM_IRQ_LINE_STATUS) {
+			if (copy_to_user(argp, &irq_event, sizeof irq_event))
+				goto out;
+		}
+
+		r = 0;
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-- 
cgit v1.2.3-70-g09d2


From 4a4541a40e1fe145c72c4b959fac524a5600d9fb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 22 Jul 2012 17:41:00 +0300
Subject: KVM: Don't update PPR on any APIC read

The current code will update the PPR on almost any APIC read; however
that's only required if we read the PPR.

kvm_update_ppr() shows up in some profiles, albeit with a low usage (~1%).
This should reduce it further (it will still be called during interrupt
processing).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fff7173f6a7..ad7fff7ad13 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -696,12 +696,14 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
 		val = apic_get_tmcct(apic);
 		break;
-
+	case APIC_PROCPRI:
+		apic_update_ppr(apic);
+		val = apic_get_reg(apic, offset);
+		break;
 	case APIC_TASKPRI:
 		report_tpr_access(apic, false);
 		/* fall thru */
 	default:
-		apic_update_ppr(apic);
 		val = apic_get_reg(apic, offset);
 		break;
 	}
-- 
cgit v1.2.3-70-g09d2


From e4ea3f6b1bf3d489674a3660db652636e50186f9 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 19 Jul 2012 13:04:47 -0400
Subject: ftrace/x86_32: Simplify parameter setup for ftrace_regs_caller

The final position of the stack after saving regs and setting up
the parameters for ftrace_regs_call, is the position of the pt_regs
needed for the 4th parameter. Instead of saving it into a temporary
reg and pushing the reg, simply push the stack pointer.

Link: http://lkml.kernel.org/r/1342702344.12353.16.camel@gandalf.stny.rr.com

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46caa5649a5..4dc301709e7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1169,10 +1169,9 @@ ENTRY(ftrace_regs_caller)
 	movl $__KERNEL_CS,13*4(%esp)
 
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
-	movl 0x4(%ebp), %edx	/* Load parent ip (2cd parameter) */
-	lea  (%esp), %ecx
-	pushl %ecx		/* Save pt_regs as 4th parameter */
+	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
 	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+	pushl %esp		/* Save pt_regs as 4th parameter */
 
 GLOBAL(ftrace_regs_call)
 	call ftrace_stub
-- 
cgit v1.2.3-70-g09d2


From 5767cfeaa9ec7b67c802143394f3ad9f8b174eb8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 3 Jul 2012 16:16:09 -0400
Subject: ftrace/x86: Remove function_trace_stop check from graph caller

The graph caller is called by the mcount callers, which already does
the check against the function_trace_stop variable. No reason to
check it again.

Link: http://lkml.kernel.org/r/20120711195745.588538769@goodmis.org

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 3 ---
 arch/x86/kernel/entry_64.S | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4dc301709e7..061ac17ee97 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1241,9 +1241,6 @@ END(mcount)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
-	cmpl $0, function_trace_stop
-	jne ftrace_stub
-
 	pushl %eax
 	pushl %ecx
 	pushl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 52bda2e8f7a..38308fa7d7e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -213,9 +213,6 @@ END(mcount)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
-	cmpl $0, function_trace_stop
-	jne ftrace_stub
-
 	MCOUNT_SAVE_FRAME
 
 	leaq 8(%rbp), %rdi
-- 
cgit v1.2.3-70-g09d2


From e52538965119319447c0800c534da73142c27be2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 5 Jun 2012 19:28:38 +0900
Subject: kprobes/x86: ftrace based optimization for x86

Add function tracer based kprobe optimization support
handlers on x86. This allows kprobes to use function
tracer for probing on mcount call.

Link: http://lkml.kernel.org/r/20120605102838.27845.26317.stgit@localhost.localdomain

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

[ Updated to new port of ftrace save regs functions ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/kprobes.h |  1 +
 arch/x86/kernel/kprobes.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/kprobes.h        |  2 +-
 kernel/kprobes.c               |  2 +-
 4 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 54788253915..d3ddd17405d 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
 #include <asm/insn.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
+#define  ARCH_SUPPORTS_KPROBES_ON_FTRACE
 
 struct pt_regs;
 struct kprobe;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751efb7b..47ae1023a93 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long flags;
+
+	/* Disable irq for emulating a breakpoint and avoiding preempt */
+	local_irq_save(flags);
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto end;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		regs->ip += sizeof(kprobe_opcode_t);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (p->pre_handler)
+			p->pre_handler(p, regs);
+
+		if (unlikely(p->post_handler)) {
+			/* Emulate singlestep as if there is a 5byte nop */
+			regs->ip = ip + MCOUNT_INSN_SIZE;
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			p->post_handler(p, regs, 0);
+		}
+		__this_cpu_write(current_kprobe, NULL);
+		regs->ip = ip;	/* Recover for next callback */
+	}
+end:
+	local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
+#endif
+
 int __init arch_init_kprobes(void)
 {
 	return arch_init_optprobes();
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index aa0d05e852e..23755ba42ab 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 #endif /* CONFIG_OPTPROBES */
 #ifdef KPROBES_CAN_USE_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				  struct pt_regs *regs);
+				  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
 #endif
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 69c16efc315..35b4315d84f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -921,7 +921,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 
 #ifdef KPROBES_CAN_USE_FTRACE
 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
-	.regs_func = kprobe_ftrace_handler,
+	.func = kprobe_ftrace_handler,
 	.flags = FTRACE_OPS_FL_SAVE_REGS,
 };
 static int kprobe_ftrace_enabled;
-- 
cgit v1.2.3-70-g09d2


From e9d90d472da97e1b1560bffb89578ba082c88a69 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:50 +0300
Subject: KVM: Remove internal timer abstraction

kvm_timer_fn(), the sole inhabitant of timer.c, is only used by lapic.c. Move
it there to make it easier to hack on it.

struct kvm_timer is a thin wrapper around hrtimer, and only adds obfuscation.
Move near its two users (with different names) to prepare for simplification.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/Makefile    |  2 +-
 arch/x86/kvm/i8254.c     |  8 ++++----
 arch/x86/kvm/i8254.h     | 18 +++++++++++++++++-
 arch/x86/kvm/kvm_timer.h | 18 ------------------
 arch/x86/kvm/lapic.c     | 30 +++++++++++++++++++++++++++++-
 arch/x86/kvm/lapic.h     | 17 ++++++++++++++++-
 arch/x86/kvm/timer.c     | 47 -----------------------------------------------
 7 files changed, 67 insertions(+), 73 deletions(-)
 delete mode 100644 arch/x86/kvm/kvm_timer.h
 delete mode 100644 arch/x86/kvm/timer.c

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4f579e8dcac..04d30401c5c 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o timer.o cpuid.o pmu.o
+			   i8254.o cpuid.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index adba28f88d1..1d8e75702d9 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -272,14 +272,14 @@ static void destroy_pit_timer(struct kvm_pit *pit)
 	flush_kthread_work(&pit->expired);
 }
 
-static bool kpit_is_periodic(struct kvm_timer *ktimer)
+static bool kpit_is_periodic(struct kvm_pit_timer *ktimer)
 {
 	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
 						 pit_timer);
 	return ps->is_periodic;
 }
 
-static struct kvm_timer_ops kpit_ops = {
+static struct kvm_pit_timer_ops kpit_ops = {
 	.is_periodic = kpit_is_periodic,
 };
 
@@ -322,7 +322,7 @@ static void pit_do_work(struct kthread_work *work)
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+	struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer);
 	struct kvm_pit *pt = ktimer->kvm->arch.vpit;
 
 	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
@@ -340,7 +340,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 {
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-	struct kvm_timer *pt = &ps->pit_timer;
+	struct kvm_pit_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index fdf40425ea1..3351816e8b3 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,10 +21,26 @@ struct kvm_kpit_channel_state {
 	ktime_t count_load_time;
 };
 
+struct kvm_pit_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	u32 timer_mode_mask;
+	u64 tscdeadline;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_pit_timer_ops *t_ops;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pit_timer_ops {
+	bool (*is_periodic)(struct kvm_pit_timer *);
+};
+
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
 	u32 flags;
-	struct kvm_timer pit_timer;
+	struct kvm_pit_timer pit_timer;
 	bool is_periodic;
 	u32    speaker_data_on;
 	struct mutex lock;
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
deleted file mode 100644
index 497dbaa366d..00000000000
--- a/arch/x86/kvm/kvm_timer.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-struct kvm_timer {
-	struct hrtimer timer;
-	s64 period; 				/* unit: ns */
-	u32 timer_mode_mask;
-	u64 tscdeadline;
-	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm_timer_ops *t_ops;
-	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
-	bool (*is_periodic)(struct kvm_timer *);
-};
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ad7fff7ad13..61ed32cd17c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1262,6 +1262,34 @@ static const struct kvm_io_device_ops apic_mmio_ops = {
 	.write    = apic_mmio_write,
 };
 
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * There is a race window between reading and incrementing, but we do
+	 * not care about potentially losing timer events in the !reinject
+	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
+	 * in vcpu_enter_guest.
+	 */
+	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
+		atomic_inc(&ktimer->pending);
+		/* FIXME: this code should not know anything about vcpus */
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	}
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+	if (ktimer->t_ops->is_periodic(ktimer)) {
+		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+		return HRTIMER_RESTART;
+	} else
+		return HRTIMER_NORESTART;
+}
+
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic;
@@ -1285,7 +1313,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS);
-	apic->lapic_timer.timer.function = kvm_timer_fn;
+	apic->lapic_timer.timer.function = apic_timer_fn;
 	apic->lapic_timer.t_ops = &lapic_timer_ops;
 	apic->lapic_timer.kvm = vcpu->kvm;
 	apic->lapic_timer.vcpu = vcpu;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4af5405ae1e..d7251c92ed4 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,10 +2,25 @@
 #define __KVM_X86_LAPIC_H
 
 #include "iodev.h"
-#include "kvm_timer.h"
 
 #include <linux/kvm_host.h>
 
+struct kvm_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	u32 timer_mode_mask;
+	u64 tscdeadline;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_timer_ops *t_ops;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_timer_ops {
+	bool (*is_periodic)(struct kvm_timer *);
+};
+
 struct kvm_lapic {
 	unsigned long base_address;
 	struct kvm_io_device dev;
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
deleted file mode 100644
index 6b85cc647f3..00000000000
--- a/arch/x86/kvm/timer.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * timer support
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/hrtimer.h>
-#include <linux/atomic.h>
-#include "kvm_timer.h"
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-	struct kvm_vcpu *vcpu = ktimer->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (ktimer->t_ops->is_periodic(ktimer)) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		return HRTIMER_RESTART;
-	} else
-		return HRTIMER_NORESTART;
-}
-- 
cgit v1.2.3-70-g09d2


From 2a6eac9638a92b61de04bac4233d8ca665ae96af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:51 +0300
Subject: KVM: Simplify kvm_timer

'reinject' is never initialized
't_ops' only serves as indirection to lapic_is_periodic; call that directly
   instead
'kvm' is never used
'vcpu' can be derived via container_of

Remove these fields.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 18 +++++-------------
 arch/x86/kvm/lapic.h |  8 --------
 2 files changed, 5 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 61ed32cd17c..0cd431c85d3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1214,10 +1214,8 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
  *----------------------------------------------------------------------
  */
 
-static bool lapic_is_periodic(struct kvm_timer *ktimer)
+static bool lapic_is_periodic(struct kvm_lapic *apic)
 {
-	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
-					      lapic_timer);
 	return apic_lvtt_period(apic);
 }
 
@@ -1253,10 +1251,6 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-static struct kvm_timer_ops lapic_timer_ops = {
-	.is_periodic = lapic_is_periodic,
-};
-
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
@@ -1265,7 +1259,8 @@ static const struct kvm_io_device_ops apic_mmio_ops = {
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
+	struct kvm_vcpu *vcpu = apic->vcpu;
 	wait_queue_head_t *q = &vcpu->wq;
 
 	/*
@@ -1274,7 +1269,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
 	 * in vcpu_enter_guest.
 	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
+	if (!atomic_read(&ktimer->pending)) {
 		atomic_inc(&ktimer->pending);
 		/* FIXME: this code should not know anything about vcpus */
 		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
@@ -1283,7 +1278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	if (waitqueue_active(q))
 		wake_up_interruptible(q);
 
-	if (ktimer->t_ops->is_periodic(ktimer)) {
+	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		return HRTIMER_RESTART;
 	} else
@@ -1314,9 +1309,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
-	apic->lapic_timer.t_ops = &lapic_timer_ops;
-	apic->lapic_timer.kvm = vcpu->kvm;
-	apic->lapic_timer.vcpu = vcpu;
 
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d7251c92ed4..166766fffd9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,14 +11,6 @@ struct kvm_timer {
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm_timer_ops *t_ops;
-	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
-	bool (*is_periodic)(struct kvm_timer *);
 };
 
 struct kvm_lapic {
-- 
cgit v1.2.3-70-g09d2


From 9d9d2239bdecd525ce3eb6cbfe4abb925c98208c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:52 +0300
Subject: KVM: Simplify kvm_pit_timer

'timer_mode_mask' is unused
'tscdeadline' is unused
't_ops' only adds needless indirection
'vcpu' is unused

Remove.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8254.c | 14 +-------------
 arch/x86/kvm/i8254.h |  8 --------
 2 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 1d8e75702d9..a9e187a5b19 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -272,17 +272,6 @@ static void destroy_pit_timer(struct kvm_pit *pit)
 	flush_kthread_work(&pit->expired);
 }
 
-static bool kpit_is_periodic(struct kvm_pit_timer *ktimer)
-{
-	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
-						 pit_timer);
-	return ps->is_periodic;
-}
-
-static struct kvm_pit_timer_ops kpit_ops = {
-	.is_periodic = kpit_is_periodic,
-};
-
 static void pit_do_work(struct kthread_work *work)
 {
 	struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
@@ -330,7 +319,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 		queue_kthread_work(&pt->worker, &pt->expired);
 	}
 
-	if (ktimer->t_ops->is_periodic(ktimer)) {
+	if (pt->pit_state.is_periodic) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		return HRTIMER_RESTART;
 	} else
@@ -357,7 +346,6 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	ps->is_periodic = is_period;
 
 	pt->timer.function = pit_timer_fn;
-	pt->t_ops = &kpit_ops;
 	pt->kvm = ps->pit->kvm;
 
 	atomic_set(&pt->pending, 0);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 3351816e8b3..c9bbcb889c4 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -24,17 +24,9 @@ struct kvm_kpit_channel_state {
 struct kvm_pit_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
-	u32 timer_mode_mask;
-	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool reinject;
-	struct kvm_pit_timer_ops *t_ops;
 	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_pit_timer_ops {
-	bool (*is_periodic)(struct kvm_pit_timer *);
 };
 
 struct kvm_kpit_state {
-- 
cgit v1.2.3-70-g09d2


From 26ef19242f6e4d747a61b5fd8da72343838864e4 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:53 +0300
Subject: KVM: fold kvm_pit_timer into kvm_kpit_state

One structure nests inside the other, providing no value at all.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8254.c | 52 +++++++++++++++++++++++++---------------------------
 arch/x86/kvm/i8254.h | 14 +++++---------
 arch/x86/kvm/x86.c   |  2 +-
 3 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a9e187a5b19..11300d2fa71 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	ktime_t remaining;
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 
-	if (!ps->pit_timer.period)
+	if (!ps->period)
 		return 0;
 
 	/*
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	 * itself with the initial count and continues counting
 	 * from there.
 	 */
-	remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
-	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
-	elapsed = mod_64(elapsed, ps->pit_timer.period);
+	remaining = hrtimer_get_remaining(&ps->timer);
+	elapsed = ps->period - ktime_to_ns(remaining);
+	elapsed = mod_64(elapsed, ps->period);
 
 	return elapsed;
 }
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 	int value;
 
 	spin_lock(&ps->inject_lock);
-	value = atomic_dec_return(&ps->pit_timer.pending);
+	value = atomic_dec_return(&ps->pending);
 	if (value < 0)
 		/* spurious acks can be generated if, for example, the
 		 * PIC is being reset.  Handle it gracefully here
 		 */
-		atomic_inc(&ps->pit_timer.pending);
+		atomic_inc(&ps->pending);
 	else if (value > 0)
 		/* in this case, we had multiple outstanding pit interrupts
 		 * that we needed to inject.  Reinject
@@ -261,14 +261,14 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
 		return;
 
-	timer = &pit->pit_state.pit_timer.timer;
+	timer = &pit->pit_state.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
 static void destroy_pit_timer(struct kvm_pit *pit)
 {
-	hrtimer_cancel(&pit->pit_state.pit_timer.timer);
+	hrtimer_cancel(&pit->pit_state.timer);
 	flush_kthread_work(&pit->expired);
 }
 
@@ -311,16 +311,16 @@ static void pit_do_work(struct kthread_work *work)
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
-	struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer);
-	struct kvm_pit *pt = ktimer->kvm->arch.vpit;
+	struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
+	struct kvm_pit *pt = ps->kvm->arch.vpit;
 
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
+	if (ps->reinject || !atomic_read(&ps->pending)) {
+		atomic_inc(&ps->pending);
 		queue_kthread_work(&pt->worker, &pt->expired);
 	}
 
-	if (pt->pit_state.is_periodic) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+	if (ps->is_periodic) {
+		hrtimer_add_expires_ns(&ps->timer, ps->period);
 		return HRTIMER_RESTART;
 	} else
 		return HRTIMER_NORESTART;
@@ -329,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 {
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-	struct kvm_pit_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
@@ -340,18 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	pr_debug("create pit timer, interval is %llu nsec\n", interval);
 
 	/* TODO The new value only affected after the retriggered */
-	hrtimer_cancel(&pt->timer);
+	hrtimer_cancel(&ps->timer);
 	flush_kthread_work(&ps->pit->expired);
-	pt->period = interval;
+	ps->period = interval;
 	ps->is_periodic = is_period;
 
-	pt->timer.function = pit_timer_fn;
-	pt->kvm = ps->pit->kvm;
+	ps->timer.function = pit_timer_fn;
+	ps->kvm = ps->pit->kvm;
 
-	atomic_set(&pt->pending, 0);
+	atomic_set(&ps->pending, 0);
 	ps->irq_ack = 1;
 
-	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+	hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
 		      HRTIMER_MODE_ABS);
 }
 
@@ -627,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	}
 	mutex_unlock(&pit->pit_state.lock);
 
-	atomic_set(&pit->pit_state.pit_timer.pending, 0);
+	atomic_set(&pit->pit_state.pending, 0);
 	pit->pit_state.irq_ack = 1;
 }
 
@@ -636,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 
 	if (!mask) {
-		atomic_set(&pit->pit_state.pit_timer.pending, 0);
+		atomic_set(&pit->pit_state.pending, 0);
 		pit->pit_state.irq_ack = 1;
 	}
 }
@@ -694,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	pit_state = &pit->pit_state;
 	pit_state->pit = pit;
-	hrtimer_init(&pit_state->pit_timer.timer,
-		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
-	pit_state->pit_timer.reinject = true;
+	pit_state->reinject = true;
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
@@ -749,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm)
 		kvm_unregister_irq_ack_notifier(kvm,
 				&kvm->arch.vpit->pit_state.irq_ack_notifier);
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
-		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+		timer = &kvm->arch.vpit->pit_state.timer;
 		hrtimer_cancel(timer);
 		flush_kthread_work(&kvm->arch.vpit->expired);
 		kthread_stop(kvm->arch.vpit->worker_task);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c9bbcb889c4..dd1b16b611b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,19 +21,15 @@ struct kvm_kpit_channel_state {
 	ktime_t count_load_time;
 };
 
-struct kvm_pit_timer {
-	struct hrtimer timer;
-	s64 period; 				/* unit: ns */
-	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm *kvm;
-};
-
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
 	u32 flags;
-	struct kvm_pit_timer pit_timer;
 	bool is_periodic;
+	s64 period; 				/* unit: ns */
+	struct hrtimer timer;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm *kvm;
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6379e55ee2..3a53bcc24f2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3082,7 +3082,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	if (!kvm->arch.vpit)
 		return -ENXIO;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
-	kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+	kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 7af6c2456851eb08d51d95de38ae8302994031e9 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 11 Jul 2012 14:20:51 +0300
Subject: crypto: arch/x86 - cleanup - remove unneeded crypto_alg.cra_list
 initializations

Initialization of cra_list is currently mixed, most ciphers initialize this
field and most shashes do not. Initialization however is not needed at all
since cra_list is initialized/overwritten in __crypto_register_alg() with
list_add(). Therefore perform cleanup to remove all unneeded initializations
of this field in 'arch/x86/crypto/'.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aes_glue.c                 |  1 -
 arch/x86/crypto/aesni-intel_glue.c         |  5 +----
 arch/x86/crypto/blowfish_glue.c            |  4 ----
 arch/x86/crypto/camellia_glue.c            |  6 ------
 arch/x86/crypto/ghash-clmulni-intel_glue.c |  2 --
 arch/x86/crypto/salsa20_glue.c             |  1 -
 arch/x86/crypto/serpent_avx_glue.c         | 10 ----------
 arch/x86/crypto/serpent_sse2_glue.c        | 10 ----------
 arch/x86/crypto/twofish_avx_glue.c         | 10 ----------
 arch/x86/crypto/twofish_glue.c             |  1 -
 arch/x86/crypto/twofish_glue_3way.c        |  5 -----
 11 files changed, 1 insertion(+), 54 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 59b37deb8c8..aafe8ce0d65 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = {
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u	= {
 		.cipher	= {
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 34fdcff4d2c..648347a0577 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1118,7 +1118,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
 
 static int __init aesni_init(void)
 {
-	int err, i;
+	int err;
 
 	if (!x86_match_cpu(aesni_cpu_id))
 		return -ENODEV;
@@ -1127,9 +1127,6 @@ static int __init aesni_init(void)
 	if (err)
 		return err;
 
-	for (i = 0; i < ARRAY_SIZE(aesni_algs); i++)
-		INIT_LIST_HEAD(&aesni_algs[i].cra_list);
-
 	return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 }
 
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 7967474de8f..50ec333b70e 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_ctxsize		= sizeof(struct bf_ctx),
 	.cra_alignmask		= 0,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[0].cra_list),
 	.cra_u = {
 		.cipher = {
 			.cia_min_keysize	= BF_MIN_KEY_SIZE,
@@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index eeb2b3b743e..7a74d7bb326 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1601,7 +1601,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[0].cra_list),
 	.cra_u			= {
 		.cipher = {
 			.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
@@ -1621,7 +1620,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1641,7 +1639,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1662,7 +1659,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1683,7 +1679,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[4].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -1707,7 +1702,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[5].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index b4bf0a63b52..6759dd1135b 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = {
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
 	},
 };
 
@@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = {
 			.cra_blocksize		= GHASH_BLOCK_SIZE,
 			.cra_type		= &crypto_ahash_type,
 			.cra_module		= THIS_MODULE,
-			.cra_list		= LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
 			.cra_init		= ghash_async_init_tfm,
 			.cra_exit		= ghash_async_exit_tfm,
 		},
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index bccb76d8098..a3a3c0205c1 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -97,7 +97,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
 	.cra_alignmask      =	3,
 	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
 	.cra_u              =   {
 		.blkcipher = {
 			.setkey         =   setkey,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index b36bdac237e..3f543a04cf1 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index d679c8675f4..9107a9908c4 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 782b67ddaf6..e7708b5442e 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
@@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index 359ae084275..0a520230350 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -70,7 +70,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize		=	sizeof(struct twofish_ctx),
 	.cra_alignmask		=	0,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 15f9347316c..aa3eb358b7e 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-- 
cgit v1.2.3-70-g09d2


From 4d6d6a2c850f89bc9283d02519cb536baba72032 Mon Sep 17 00:00:00 2001
From: Johannes Goetzfried
 <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Date: Wed, 11 Jul 2012 19:37:37 +0200
Subject: crypto: cast5 - add x86_64/avx assembler implementation

This patch adds a x86_64/avx assembler implementation of the Cast5 block
cipher. The implementation processes sixteen blocks in parallel (four 4 block
chunk AVX operations). The table-lookups are done in general-purpose registers.
For small blocksizes the functions from the generic module are called. A good
performance increase is provided for blocksizes greater or equal to 128B.

Patch has been tested with tcrypt and automated filesystem tests.

Tcrypt benchmark results:

Intel Core i5-2500 CPU (fam:6, model:42, step:7)

cast5-avx-x86_64 vs. cast5-generic
64bit key:
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec
16B     0.99x   0.99x   1.00x   1.00x   1.02x   1.01x
64B     1.00x   1.00x   0.98x   1.00x   1.01x   1.02x
256B    2.03x   2.01x   0.95x   2.11x   2.12x   2.13x
1024B   2.30x   2.24x   0.95x   2.29x   2.35x   2.35x
8192B   2.31x   2.27x   0.95x   2.31x   2.39x   2.39x

128bit key:
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec
16B     0.99x   0.99x   1.00x   1.00x   1.01x   1.01x
64B     1.00x   1.00x   0.98x   1.01x   1.02x   1.01x
256B    2.17x   2.13x   0.96x   2.19x   2.19x   2.19x
1024B   2.29x   2.32x   0.95x   2.34x   2.37x   2.38x
8192B   2.35x   2.32x   0.95x   2.35x   2.39x   2.39x

Signed-off-by: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                  |   2 +
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 322 ++++++++++++++++++
 arch/x86/crypto/cast5_avx_glue.c          | 530 ++++++++++++++++++++++++++++++
 crypto/Kconfig                            |  14 +
 crypto/testmgr.c                          |  60 ++++
 5 files changed, 928 insertions(+)
 create mode 100644 arch/x86/crypto/cast5-avx-x86_64-asm_64.S
 create mode 100644 arch/x86/crypto/cast5_avx_glue.c

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e908e5de82d..565e82b0014 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -32,6 +33,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
+cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
new file mode 100644
index 00000000000..94693c877e3
--- /dev/null
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -0,0 +1,322 @@
+/*
+ * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast5-avx-x86_64-asm_64.S"
+.text
+
+.extern cast5_s1
+.extern cast5_s2
+.extern cast5_s3
+.extern cast5_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(16*4)
+#define rr	((16*4)+16)
+
+/* s-boxes */
+#define s1	cast5_s1
+#define s2	cast5_s2
+#define s3	cast5_s3
+#define s4	cast5_s4
+
+/**********************************************************************
+  16-way AVX cast5
+ **********************************************************************/
+#define CTX %rdi
+
+#define RL1 %xmm0
+#define RR1 %xmm1
+#define RL2 %xmm2
+#define RR2 %xmm3
+#define RL3 %xmm4
+#define RR3 %xmm5
+#define RL4 %xmm6
+#define RR4 %xmm7
+
+#define RX %xmm8
+
+#define RKM  %xmm9
+#define RKRF %xmm10
+#define RKRR %xmm11
+
+#define RTMP  %xmm12
+#define RMASK %xmm13
+#define R32   %xmm14
+
+#define RID1  %rax
+#define RID1b %al
+#define RID2  %rbx
+#define RID2b %bl
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3) \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	shrq $16,	src;                     \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define F(a, x, op0, op1, op2, op3) \
+	op0	a,	RKM,  x;                 \
+	vpslld  RKRF,	x,    RTMP;              \
+	vpsrld  RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vpshufb	RMASK,	x,    x;                 \
+	vmovq		x,    RGI1;              \
+	vpsrldq $8,	x,    x;                 \
+	vmovq		x,    RGI2;              \
+	\
+	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
+	shrq $16,	RGI1;                    \
+	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
+	shlq $32,	RFS2;                    \
+	orq		RFS1, RFS2;              \
+	\
+	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
+	shrq $16,	RGI2;                    \
+	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
+	shlq $32,	RFS3;                    \
+	orq		RFS1, RFS3;              \
+	\
+	vmovq		RFS2, x;                 \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
+#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
+#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+
+#define subround(a, b, x, n, f) \
+	F ## f(b, x);  \
+	vpxor a, x, a;
+
+#define round(l, r, n, f) \
+	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
+	vpinsrb $0,	(kr+n)(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,            R32,  RKRR; \
+	subround(l ## 1, r ## 1, RX, n, f);          \
+	subround(l ## 2, r ## 2, RX, n, f);          \
+	subround(l ## 3, r ## 3, RX, n, f);          \
+	subround(l ## 4, r ## 4, RX, n, f);
+
+
+#define transpose_2x4(x0, x1, t0, t1) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t1; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1;
+
+#define inpack_blocks(in, x0, x1, t0, t1) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vpshufb RMASK, x0,	x0; \
+	vpshufb RMASK, x1,	x1; \
+	\
+	transpose_2x4(x0, x1, t0, t1)
+
+#define outunpack_blocks(out, x0, x1, t0, t1) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb RMASK,	x0, x0;           \
+	vpshufb RMASK,	x1, x1;           \
+	vmovdqu 	x0, (0*4*4)(out); \
+	vmovdqu		x1, (1*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, t0, t1) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb RMASK,	x0, x0;               \
+	vpshufb RMASK,	x1, x1;               \
+	vpxor		(0*4*4)(out), x0, x0; \
+	vmovdqu 	x0, (0*4*4)(out);     \
+	vpxor		(1*4*4)(out), x1, x1; \
+	vmovdqu	        x1, (1*4*4)(out);
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.L32_mask:
+	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+
+.align 16
+.global __cast5_enc_blk_16way
+.type   __cast5_enc_blk_16way,@function;
+
+__cast5_enc_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
+	leaq (2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	round(RL, RR, 0, 1);
+	round(RR, RL, 1, 2);
+	round(RL, RR, 2, 3);
+	round(RR, RL, 3, 1);
+	round(RL, RR, 4, 2);
+	round(RR, RL, 5, 3);
+	round(RL, RR, 6, 1);
+	round(RR, RL, 7, 2);
+	round(RL, RR, 8, 3);
+	round(RR, RL, 9, 1);
+	round(RL, RR, 10, 2);
+	round(RR, RL, 11, 3);
+
+	movb rr(CTX), %al;
+	testb %al, %al;
+	jnz __skip_enc;
+
+	round(RL, RR, 12, 1);
+	round(RR, RL, 13, 2);
+	round(RL, RR, 14, 3);
+	round(RR, RL, 15, 1);
+
+__skip_enc:
+	popq %rcx;
+	popq %rbx;
+
+	testb %cl, %cl;
+	jnz __enc_xor16;
+
+	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
+
+__enc_xor16:
+	outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
+
+.align 16
+.global cast5_dec_blk_16way
+.type   cast5_dec_blk_16way,@function;
+
+cast5_dec_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
+	leaq (2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	movb rr(CTX), %al;
+	testb %al, %al;
+	jnz __skip_dec;
+
+	round(RL, RR, 15, 1);
+	round(RR, RL, 14, 3);
+	round(RL, RR, 13, 2);
+	round(RR, RL, 12, 1);
+
+__skip_dec:
+	round(RL, RR, 11, 3);
+	round(RR, RL, 10, 2);
+	round(RL, RR, 9, 1);
+	round(RR, RL, 8, 3);
+	round(RL, RR, 7, 2);
+	round(RR, RL, 6, 1);
+	round(RL, RR, 5, 3);
+	round(RR, RL, 4, 2);
+	round(RL, RR, 3, 1);
+	round(RR, RL, 2, 3);
+	round(RL, RR, 1, 2);
+	round(RR, RL, 0, 1);
+
+	popq %rbx;
+
+	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
new file mode 100644
index 00000000000..445aab06387
--- /dev/null
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -0,0 +1,530 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast5 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast5.h>
+#include <crypto/cryptd.h>
+#include <crypto/ctr.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST5_PARALLEL_BLOCKS 16
+
+asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src, bool xor);
+asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				    const u8 *src);
+
+static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, false);
+}
+
+static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, true);
+}
+
+static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast5_dec_blk_16way(ctx, dst, src);
+}
+
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast5_fpu_end(bool fpu_enabled)
+{
+	return glue_fpu_end(fpu_enabled);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     bool enc)
+{
+	bool fpu_enabled = false;
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+
+		/* Process multi-block batch */
+		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			do {
+				if (enc)
+					cast5_enc_blk_xway(ctx, wdst, wsrc);
+				else
+					cast5_dec_blk_xway(ctx, wdst, wsrc);
+
+				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
+				wdst += bsize * CAST5_PARALLEL_BLOCKS;
+				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			if (enc)
+				__cast5_encrypt(ctx, wdst, wsrc);
+			else
+				__cast5_decrypt(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, false);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv ^= *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
+	u64 last_iv;
+	int i;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
+			src -= CAST5_PARALLEL_BLOCKS - 1;
+			dst -= CAST5_PARALLEL_BLOCKS - 1;
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				ivs[i] = src[i];
+
+			cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				*(dst + (i + 1)) ^= *(ivs + i);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes)) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *ctrblk = walk->iv;
+	u8 keystream[CAST5_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	__cast5_encrypt(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
+	int i;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			/* create ctrblks for parallel encrypt */
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
+				if (dst != src)
+					dst[i] = src[i];
+
+				ctrblocks[i] = cpu_to_be64(ctrblk++);
+			}
+
+			cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
+					       (u8 *)ctrblocks);
+
+			src += CAST5_PARALLEL_BLOCKS;
+			dst += CAST5_PARALLEL_BLOCKS;
+			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
+		*dst ^= ctrblocks[0];
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+
+static struct crypto_alg cast5_algs[6] = { {
+	.cra_name		= "__ecb-cast5-avx",
+	.cra_driver_name	= "__driver-ecb-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast5-avx",
+	.cra_driver_name	= "__driver-cbc-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast5-avx",
+	.cra_driver_name	= "__driver-ctr-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast5)",
+	.cra_driver_name	= "ecb-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast5)",
+	.cra_driver_name	= "cbc-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast5)",
+	.cra_driver_name	= "ctr-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+} };
+
+static int __init cast5_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+static void __exit cast5_exit(void)
+{
+	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+module_init(cast5_init);
+module_exit(cast5_exit);
+
+MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast5");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a3238051b03..cda97fcaa82 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -692,6 +692,20 @@ config CRYPTO_CAST5
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 
+config CRYPTO_CAST5_AVX_X86_64
+	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_CAST5
+	help
+	  The CAST5 encryption algorithm (synonymous with CAST-128) is
+	  described in RFC2144.
+
+	  This module provides the Cast5 cipher algorithm that processes
+	  sixteen blocks parallel using the AVX instruction set.
+
 config CRYPTO_CAST6
 	tristate "CAST6 (CAST-256) cipher algorithm"
 	select CRYPTO_ALGAPI
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 7a91e540563..def0f430b66 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "__cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
 		.alg = "__cbc-serpent-avx",
 		.test = alg_test_null,
 		.suite = {
@@ -1594,6 +1609,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1655,6 +1685,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-serpent-avx",
 		.test = alg_test_null,
@@ -1951,6 +1996,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast5-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-serpent-avx)",
 		.test = alg_test_null,
-- 
cgit v1.2.3-70-g09d2


From 4ea1277d301eb776e321684cd4ea95116b4e8847 Mon Sep 17 00:00:00 2001
From: Johannes Goetzfried
 <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Date: Wed, 11 Jul 2012 19:38:57 +0200
Subject: crypto: cast6 - add x86_64/avx assembler implementation

This patch adds a x86_64/avx assembler implementation of the Cast6 block
cipher. The implementation processes eight blocks in parallel (two 4 block
chunk AVX operations). The table-lookups are done in general-purpose registers.
For small blocksizes the functions from the generic module are called. A good
performance increase is provided for blocksizes greater or equal to 128B.

Patch has been tested with tcrypt and automated filesystem tests.

Tcrypt benchmark results:

Intel Core i5-2500 CPU (fam:6, model:42, step:7)

cast6-avx-x86_64 vs. cast6-generic
128bit key:                                             (lrw:256bit)    (xts:256bit)
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec
16B     0.97x   1.00x   1.01x   1.01x   0.99x   0.97x   0.98x   1.01x   0.96x   0.98x
64B     0.98x   0.99x   1.02x   1.01x   0.99x   1.00x   1.01x   0.99x   1.00x   0.99x
256B    1.77x   1.84x   0.99x   1.85x   1.77x   1.77x   1.70x   1.74x   1.69x   1.72x
1024B   1.93x   1.95x   0.99x   1.96x   1.93x   1.93x   1.84x   1.85x   1.89x   1.87x
8192B   1.91x   1.95x   0.99x   1.97x   1.95x   1.91x   1.86x   1.87x   1.93x   1.90x

256bit key:                                             (lrw:384bit)    (xts:512bit)
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec
16B     0.97x   0.99x   1.02x   1.01x   0.98x   0.99x   1.00x   1.00x   0.98x   0.98x
64B     0.98x   0.99x   1.01x   1.00x   1.00x   1.00x   1.01x   1.01x   0.97x   1.00x
256B    1.77x   1.83x   1.00x   1.86x   1.79x   1.78x   1.70x   1.76x   1.71x   1.69x
1024B   1.92x   1.95x   0.99x   1.96x   1.93x   1.93x   1.83x   1.86x   1.89x   1.87x
8192B   1.94x   1.95x   0.99x   1.97x   1.95x   1.95x   1.87x   1.87x   1.93x   1.91x

Signed-off-by: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                  |   2 +
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 335 +++++++++++++++
 arch/x86/crypto/cast6_avx_glue.c          | 648 ++++++++++++++++++++++++++++++
 crypto/Kconfig                            |  17 +
 crypto/testmgr.c                          |  60 +++
 5 files changed, 1062 insertions(+)
 create mode 100644 arch/x86/crypto/cast6-avx-x86_64-asm_64.S
 create mode 100644 arch/x86/crypto/cast6_avx_glue.c

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 565e82b0014..5bacb4a226a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -34,6 +35,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
+cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
new file mode 100644
index 00000000000..d258ce0d2e0
--- /dev/null
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -0,0 +1,335 @@
+/*
+ * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast6-avx-x86_64-asm_64.S"
+.text
+
+.extern cast6_s1
+.extern cast6_s2
+.extern cast6_s3
+.extern cast6_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(12*4*4)
+
+/* s-boxes */
+#define s1	cast6_s1
+#define s2	cast6_s2
+#define s3	cast6_s3
+#define s4	cast6_s4
+
+/**********************************************************************
+  8-way AVX cast6
+ **********************************************************************/
+#define CTX %rdi
+
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+
+#define RA2 %xmm4
+#define RB2 %xmm5
+#define RC2 %xmm6
+#define RD2 %xmm7
+
+#define RX %xmm8
+
+#define RKM  %xmm9
+#define RKRF %xmm10
+#define RKRR %xmm11
+
+#define RTMP  %xmm12
+#define RMASK %xmm13
+#define R32   %xmm14
+
+#define RID1  %rax
+#define RID1b %al
+#define RID2  %rbx
+#define RID2b %bl
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3) \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	shrq $16,	src;                     \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define F(a, x, op0, op1, op2, op3) \
+	op0	a,	RKM,  x;                 \
+	vpslld  RKRF,	x,    RTMP;              \
+	vpsrld  RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vpshufb	RMASK,	x,    x;                 \
+	vmovq		x,    RGI1;              \
+	vpsrldq $8,	x,    x;                 \
+	vmovq		x,    RGI2;              \
+	\
+	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
+	shrq $16,	RGI1;                    \
+	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
+	shlq $32,	RFS2;                    \
+	orq		RFS1, RFS2;              \
+	\
+	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
+	shrq $16,	RGI2;                    \
+	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
+	shlq $32,	RFS3;                    \
+	orq		RFS1, RFS3;              \
+	\
+	vmovq		RFS2, x;                 \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
+#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
+#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+
+#define qop(in, out, x, f) \
+	F ## f(in ## 1, x);          \
+	vpxor out ## 1, x, out ## 1; \
+	F ## f(in ## 2, x);          \
+	vpxor out ## 2, x, out ## 2; \
+
+#define Q(n) \
+	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RD, RC, RX, 1);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RC, RB, RX, 2);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RB, RA, RX, 3);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RA, RD, RX, 1);
+
+#define QBAR(n) \
+	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RA, RD, RX, 1);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RB, RA, RX, 3);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RC, RB, RX, 2);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RD, RC, RX, 1);
+
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t2; \
+	vpunpckldq		x3, x2, t1; \
+	vpunpckhdq		x3, x2, x3; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1; \
+	vpunpcklqdq		x3, t2, x2; \
+	vpunpckhqdq		x3, t2, x3;
+
+#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vmovdqu (2*4*4)(in),	x2; \
+	vmovdqu (3*4*4)(in),	x3; \
+	vpshufb RMASK, x0,	x0; \
+	vpshufb RMASK, x1,	x1; \
+	vpshufb RMASK, x2,	x2; \
+	vpshufb RMASK, x3,	x3; \
+	\
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb RMASK,		x0, x0;       \
+	vpshufb RMASK,		x1, x1;       \
+	vpshufb RMASK,		x2, x2;       \
+	vpshufb RMASK,		x3, x3;       \
+	vmovdqu x0,		(0*4*4)(out); \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vmovdqu	x2,		(2*4*4)(out); \
+	vmovdqu	x3,		(3*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb RMASK,		x0, x0;       \
+	vpshufb RMASK,		x1, x1;       \
+	vpshufb RMASK,		x2, x2;       \
+	vpshufb RMASK,		x3, x3;       \
+	vpxor (0*4*4)(out),	x0, x0;       \
+	vmovdqu	x0,		(0*4*4)(out); \
+	vpxor (1*4*4)(out),	x1, x1;       \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vpxor (2*4*4)(out),	x2, x2;       \
+	vmovdqu x2,		(2*4*4)(out); \
+	vpxor (3*4*4)(out),	x3, x3;       \
+	vmovdqu x3,		(3*4*4)(out);
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.L32_mask:
+	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+
+.align 16
+.global __cast6_enc_blk_8way
+.type   __cast6_enc_blk_8way,@function;
+
+__cast6_enc_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	Q(0);
+	Q(1);
+	Q(2);
+	Q(3);
+	Q(4);
+	Q(5);
+	QBAR(6);
+	QBAR(7);
+	QBAR(8);
+	QBAR(9);
+	QBAR(10);
+	QBAR(11);
+
+	popq %rcx;
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor8;
+
+	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
+
+__enc_xor8:
+	outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
+
+.align 16
+.global cast6_dec_blk_8way
+.type   cast6_dec_blk_8way,@function;
+
+cast6_dec_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	Q(11);
+	Q(10);
+	Q(9);
+	Q(8);
+	Q(7);
+	Q(6);
+	QBAR(5);
+	QBAR(4);
+	QBAR(3);
+	QBAR(2);
+	QBAR(1);
+	QBAR(0);
+
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
new file mode 100644
index 00000000000..15e5f85a501
--- /dev/null
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -0,0 +1,648 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast6 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast6.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST6_PARALLEL_BLOCKS 8
+
+asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				     const u8 *src, bool xor);
+asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				   const u8 *src);
+
+static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast6_dec_blk_8way(ctx, dst, src);
+}
+
+
+static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+	u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
+	unsigned int j;
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
+
+	cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+	be128 ctrblk;
+
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
+
+	__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				   u128 *iv)
+{
+	be128 ctrblks[CAST6_PARALLEL_BLOCKS];
+	unsigned int i;
+
+	for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
+
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
+	}
+
+	cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx cast6_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
+				       dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
+				       nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast6_fpu_end(bool fpu_enabled)
+{
+	glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+	struct cast6_ctx *ctx;
+	bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
+}
+
+struct cast6_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct cast6_ctx cast6_ctx;
+};
+
+static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
+			     &tfm->crt_flags);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+struct cast6_xts_ctx {
+	struct cast6_ctx tweak_ctx;
+	struct cast6_ctx crypt_ctx;
+};
+
+static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+			      flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static struct crypto_alg cast6_algs[10] = { {
+	.cra_name		= "__ecb-cast6-avx",
+	.cra_driver_name	= "__driver-ecb-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast6-avx",
+	.cra_driver_name	= "__driver-cbc-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast6-avx",
+	.cra_driver_name	= "__driver-ctr-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-cast6-avx",
+	.cra_driver_name	= "__driver-lrw-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= lrw_cast6_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-cast6-avx",
+	.cra_driver_name	= "__driver-xts-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= xts_cast6_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast6)",
+	.cra_driver_name	= "ecb-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast6)",
+	.cra_driver_name	= "cbc-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast6)",
+	.cra_driver_name	= "ctr-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+}, {
+	.cra_name		= "lrw(cast6)",
+	.cra_driver_name	= "lrw-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(cast6)",
+	.cra_driver_name	= "xts-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+} };
+
+static int __init cast6_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+static void __exit cast6_exit(void)
+{
+	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+module_init(cast6_init);
+module_exit(cast6_exit);
+
+MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast6");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cda97fcaa82..fe8ed62efe2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -713,6 +713,23 @@ config CRYPTO_CAST6
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  described in RFC2612.
 
+config CRYPTO_CAST6_AVX_X86_64
+	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_CAST6
+	select CRYPTO_LRW
+	select CRYPTO_XTS
+	help
+	  The CAST6 encryption algorithm (synonymous with CAST-256) is
+	  described in RFC2612.
+
+	  This module provides the Cast6 cipher algorithm that processes
+	  eight blocks parallel using the AVX instruction set.
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index cff3c1c3f83..575b57c3244 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1548,6 +1548,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1624,6 +1639,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1700,6 +1730,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-serpent-avx",
 		.test = alg_test_null,
@@ -2026,6 +2071,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast6-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-serpent-avx)",
 		.test = alg_test_null,
-- 
cgit v1.2.3-70-g09d2


From e115676e042f4d9268c6b6d8cb7dc962aa6cfd7d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 1 Aug 2012 17:01:42 +0300
Subject: KVM: x86: update KVM_SAVE_MSRS_BEGIN to correct value

When MSR_KVM_PV_EOI_EN was added to msrs_to_save array
KVM_SAVE_MSRS_BEGIN was not updated accordingly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a53bcc24f2..a87c82aa319 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	9
+#define KVM_SAVE_MSRS_BEGIN	10
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-- 
cgit v1.2.3-70-g09d2


From ea22571c8fd912f28e2525f7112bbb84b474ff3a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:37 -0400
Subject: x86: mce: Disable preemption when calling raise_local()

raise_mce() has a code path which does not disable preemption when the
raise_local() is called. The per cpu variable access in raise_local()
depends on preemption being disabled to be functional. So that code
path was either never tested or never tested with CONFIG_DEBUG_PREEMPT
enabled.

Add the missing preempt_disable/enable() pair around the call.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb39357..753746f6dbd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -194,7 +194,11 @@ static void raise_mce(struct mce *m)
 		put_online_cpus();
 	} else
 #endif
+	{
+		preempt_disable();
 		raise_local();
+		preempt_enable();
+	}
 }
 
 /* Error injection interface */
-- 
cgit v1.2.3-70-g09d2


From b5975917a3e5f93b5d1c95561aab0aa44327baea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:38 -0400
Subject: x86: mce: Serialize mce injection

raise_mce() fiddles with global state, but lacks any kind of
serialization.

Add a mutex around the raise_mce() call, so concurrent writers do not
stomp on each other toes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 753746f6dbd..ddc72f83933 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
 }
 
 static cpumask_var_t mce_inject_cpumask;
+static DEFINE_MUTEX(mce_inject_mutex);
 
 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 {
@@ -229,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	 * so do it a jiffie or two later everywhere.
 	 */
 	schedule_timeout(2);
+
+	mutex_lock(&mce_inject_mutex);
 	raise_mce(&m);
+	mutex_unlock(&mce_inject_mutex);
 	return usize;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 26c3c283c5b08dd250279c06ba3ab5b094bbacc3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:39 -0400
Subject: x86: mce: Split timer init

Split timer init function into the init and the start part, so the
start part can replace the open coded version in CPU_DOWN_FAILED.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5e095f873e3..bd3a5e850f4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1557,23 +1557,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
-static void __mcheck_cpu_init_timer(void)
+static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	struct timer_list *t = &__get_cpu_var(mce_timer);
 	unsigned long iv = check_interval * HZ;
 
-	setup_timer(t, mce_timer_fn, smp_processor_id());
+	__this_cpu_write(mce_next_interval, iv);
 
-	if (mce_ignore_ce)
+	if (mce_ignore_ce || !iv)
 		return;
 
-	__this_cpu_write(mce_next_interval, iv);
-	if (!iv)
-		return;
 	t->expires = round_jiffies(jiffies + iv);
 	add_timer_on(t, smp_processor_id());
 }
 
+static void __mcheck_cpu_init_timer(void)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	unsigned int cpu = smp_processor_id();
+
+	setup_timer(t, mce_timer_fn, cpu);
+	mce_start_timer(cpu, t);
+}
+
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -2277,12 +2282,8 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		if (!mce_ignore_ce && check_interval) {
-			t->expires = round_jiffies(jiffies +
-					per_cpu(mce_next_interval, cpu));
-			add_timer_on(t, cpu);
-		}
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		mce_start_timer(cpu, t);
 		break;
 	case CPU_POST_DEAD:
 		/* intentionally ignoring frozen here */
-- 
cgit v1.2.3-70-g09d2


From 1a65f970d10ace7a1e399f9061a65679c0ae57d0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:40 -0400
Subject: x86: mce: Remove the frozen cases in the hotplug code

No point in having double cases if we can simply mask the FROZEN bit
out.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index bd3a5e850f4..b4dde1527ed 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2262,34 +2262,32 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	unsigned int cpu = (unsigned long)hcpu;
 	struct timer_list *t = &per_cpu(mce_timer, cpu);
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
 		mce_device_create(cpu);
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		break;
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		mce_device_remove(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
 		del_timer_sync(t);
 		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
 		break;
 	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
 		mce_start_timer(cpu, t);
 		break;
-	case CPU_POST_DEAD:
+	}
+
+	if (action == CPU_POST_DEAD) {
 		/* intentionally ignoring frozen here */
 		cmci_rediscover(cpu);
-		break;
 	}
+
 	return NOTIFY_OK;
 }
 
-- 
cgit v1.2.3-70-g09d2


From aab2eb7a38e0e510874acca01838f5badbca6c7e Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:01:10 +0900
Subject: KVM: Stop checking rmap to see if slot is being created

Instead, check npages consistently.  This helps to make rmap
architecture specific in a later patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ca90d74711..abc039d7842 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6385,7 +6385,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 *x86 needs to handle !user_alloc case.
 	 */
 	if (!user_alloc) {
-		if (npages && !old.rmap) {
+		if (npages && !old.npages) {
 			unsigned long userspace_addr;
 
 			userspace_addr = vm_mmap(NULL, 0,
@@ -6413,7 +6413,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
 
-	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+	if (!user_alloc && !old.user_alloc && old.npages && !npages) {
 		int ret;
 
 		ret = vm_munmap(old.userspace_addr,
-- 
cgit v1.2.3-70-g09d2


From 65fbe37c42ed75604c9a770639209dcee162ebe7 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:02:01 +0900
Subject: KVM: MMU: Use gfn_to_rmap() instead of directly reading rmap array

This helps to make rmap architecture specific in a later patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       | 3 ++-
 arch/x86/kvm/mmu_audit.c | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a9a20528e70..ee768bb2367 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1181,7 +1181,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	unsigned long *rmapp;
 
 	while (mask) {
-		rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
+		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+				      PT_PAGE_TABLE_LEVEL, slot);
 		__rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
 
 		/* clear the first set bit */
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 7d7d0b9e23e..ca403f9bb0f 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -190,7 +190,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	struct kvm_memory_slot *slot;
 	unsigned long *rmapp;
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -198,8 +197,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 	if (sp->role.direct || sp->unsync || sp->role.invalid)
 		return;
 
-	slot = gfn_to_memslot(kvm, sp->gfn);
-	rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
+	rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
-- 
cgit v1.2.3-70-g09d2


From d89cc617b954aff4030fce178f7d86f59aaf713d Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:03:28 +0900
Subject: KVM: Push rmap into kvm_arch_memory_slot

Two reasons:
 - x86 can integrate rmap and rmap_pde and remove heuristics in
   __gfn_to_rmap().
 - Some architectures do not need rmap.

Since rmap is one of the most memory consuming stuff in KVM, ppc'd
better restrict the allocation to Book3S HV.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  6 ++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |  4 +--
 arch/powerpc/kvm/powerpc.c          |  8 ++++++
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  |  5 +---
 arch/x86/kvm/x86.c                  | 55 +++++++++++++++++++++----------------
 include/linux/kvm_host.h            |  1 -
 virt/kvm/kvm_main.c                 | 11 +-------
 9 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 572ad014126..a29e0918172 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -221,6 +221,7 @@ struct revmap_entry {
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch_memory_slot {
+	unsigned long *rmap;
 };
 
 struct kvm_arch {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3c635c0616b..d95d11322a1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -705,7 +705,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
-	rmap = &memslot->rmap[gfn - memslot->base_gfn];
+	rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
 	lock_rmap(rmap);
 
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -788,7 +788,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 		for (; gfn < gfn_end; ++gfn) {
 			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
+			ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
 			retval |= ret;
 		}
 	}
@@ -1036,7 +1036,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	unsigned long *rmapp, *map;
 
 	preempt_disable();
-	rmapp = memslot->rmap;
+	rmapp = memslot->arch.rmap;
 	map = memslot->dirty_bitmap;
 	for (i = 0; i < memslot->npages; ++i) {
 		if (kvm_test_clear_dirty(kvm, rmapp))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5c70d19494f..56ac1a5d991 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -84,7 +84,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return;
 
-	rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 	lock_rmap(rmap);
 
 	head = *rmap & KVMPPC_RMAP_INDEX;
@@ -180,7 +180,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (!slot_is_aligned(memslot, psize))
 		return H_PARAMETER;
 	slot_fn = gfn - memslot->base_gfn;
-	rmap = &memslot->rmap[slot_fn];
+	rmap = &memslot->arch.rmap[slot_fn];
 
 	if (!kvm->arch.using_mmu_notifiers) {
 		physp = kvm->arch.slot_phys[memslot->id];
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc88607..879b14a6140 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -302,10 +302,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
+	if (!dont || free->arch.rmap != dont->arch.rmap) {
+		vfree(free->arch.rmap);
+		free->arch.rmap = NULL;
+	}
 }
 
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
+	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+	if (!slot->arch.rmap)
+		return -ENOMEM;
+
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48e71318846..1309e69b57f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -504,7 +504,7 @@ struct kvm_lpage_info {
 };
 
 struct kvm_arch_memory_slot {
-	unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
+	unsigned long *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee768bb2367..aa9a987ddef 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -970,11 +970,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 {
 	unsigned long idx;
 
-	if (likely(level == PT_PAGE_TABLE_LEVEL))
-		return &slot->rmap[gfn - slot->base_gfn];
-
 	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
+	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index abc039d7842..ebf2109318e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6303,14 +6303,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
-			kvm_kvfree(free->arch.rmap_pde[i]);
-			free->arch.rmap_pde[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
+			kvm_kvfree(free->arch.rmap[i]);
+			free->arch.rmap[i] = NULL;
 		}
-		if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
-			kvm_kvfree(free->arch.lpage_info[i]);
-			free->arch.lpage_info[i] = NULL;
+		if (i == 0)
+			continue;
+
+		if (!dont || free->arch.lpage_info[i - 1] !=
+			     dont->arch.lpage_info[i - 1]) {
+			kvm_kvfree(free->arch.lpage_info[i - 1]);
+			free->arch.lpage_info[i - 1] = NULL;
 		}
 	}
 }
@@ -6319,28 +6323,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		unsigned long ugfn;
 		int lpages;
-		int level = i + 2;
+		int level = i + 1;
 
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
-		slot->arch.rmap_pde[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
-		if (!slot->arch.rmap_pde[i])
+		slot->arch.rmap[i] =
+			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+		if (!slot->arch.rmap[i])
 			goto out_free;
+		if (i == 0)
+			continue;
 
-		slot->arch.lpage_info[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
-		if (!slot->arch.lpage_info[i])
+		slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
+					sizeof(*slot->arch.lpage_info[i - 1]));
+		if (!slot->arch.lpage_info[i - 1])
 			goto out_free;
 
 		if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][0].write_count = 1;
+			slot->arch.lpage_info[i - 1][0].write_count = 1;
 		if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+			slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
 		ugfn = slot->userspace_addr >> PAGE_SHIFT;
 		/*
 		 * If the gfn and userspace address are not aligned wrt each
@@ -6352,18 +6358,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 			unsigned long j;
 
 			for (j = 0; j < lpages; ++j)
-				slot->arch.lpage_info[i][j].write_count = 1;
+				slot->arch.lpage_info[i - 1][j].write_count = 1;
 		}
 	}
 
 	return 0;
 
 out_free:
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		kvm_kvfree(slot->arch.rmap_pde[i]);
-		kvm_kvfree(slot->arch.lpage_info[i]);
-		slot->arch.rmap_pde[i] = NULL;
-		slot->arch.lpage_info[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		kvm_kvfree(slot->arch.rmap[i]);
+		slot->arch.rmap[i] = NULL;
+		if (i == 0)
+			continue;
+
+		kvm_kvfree(slot->arch.lpage_info[i - 1]);
+		slot->arch.lpage_info[i - 1] = NULL;
 	}
 	return -ENOMEM;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbc65f9d6a2..3c16f0f1fe3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -213,7 +213,6 @@ struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bcf973ec98f..14ec567816a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -550,16 +550,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	if (!dont || free->rmap != dont->rmap)
-		vfree(free->rmap);
-
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
 	kvm_arch_free_memslot(free, dont);
 
 	free->npages = 0;
-	free->rmap = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -768,11 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (npages && !old.npages) {
 		new.user_alloc = user_alloc;
 		new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
-		new.rmap = vzalloc(npages * sizeof(*new.rmap));
-		if (!new.rmap)
-			goto out_free;
-#endif /* not defined CONFIG_S390 */
+
 		if (kvm_arch_create_memslot(&new, npages))
 			goto out_free;
 	}
@@ -831,7 +823,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (!npages) {
-		new.rmap = NULL;
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
-- 
cgit v1.2.3-70-g09d2


From 6c8ee57be9350c5c2cafdd6a99d0462d528676e2 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:37:54 +0800
Subject: KVM: introduce KVM_PFN_ERR_FAULT

After that, the exported and un-inline function, get_fault_pfn,
can be removed

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/kvm_main.c      | 12 +++---------
 3 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa9a987ddef..9cf90c8d584 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2512,7 +2512,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
 	if (!slot)
-		return get_fault_pfn();
+		return KVM_PFN_ERR_FAULT;
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c16f0f1fe3..ef5554f4748 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -48,6 +48,8 @@
 #define KVM_MAX_MMIO_FRAGMENTS \
 	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
 
+#define KVM_PFN_ERR_FAULT	(-EFAULT)
+
 /*
  * vcpu->requests bit members
  */
@@ -443,7 +445,6 @@ void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
 void kvm_get_pfn(pfn_t pfn);
-pfn_t get_fault_pfn(void);
 
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 14ec567816a..ef0491645a1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -939,12 +939,6 @@ static pfn_t get_bad_pfn(void)
 	return -ENOENT;
 }
 
-pfn_t get_fault_pfn(void)
-{
-	return -EFAULT;
-}
-EXPORT_SYMBOL_GPL(get_fault_pfn);
-
 static pfn_t get_hwpoison_pfn(void)
 {
 	return -EHWPOISON;
@@ -1115,7 +1109,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		struct vm_area_struct *vma;
 
 		if (atomic)
-			return get_fault_pfn();
+			return KVM_PFN_ERR_FAULT;
 
 		down_read(&current->mm->mmap_sem);
 		if (npages == -EHWPOISON ||
@@ -1127,7 +1121,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		vma = find_vma_intersection(current->mm, addr, addr+1);
 
 		if (vma == NULL)
-			pfn = get_fault_pfn();
+			pfn = KVM_PFN_ERR_FAULT;
 		else if ((vma->vm_flags & VM_PFNMAP)) {
 			pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 				vma->vm_pgoff;
@@ -1135,7 +1129,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		} else {
 			if (async && (vma->vm_flags & VM_WRITE))
 				*async = true;
-			pfn = get_fault_pfn();
+			pfn = KVM_PFN_ERR_FAULT;
 		}
 		up_read(&current->mm->mmap_sem);
 	} else
-- 
cgit v1.2.3-70-g09d2


From e6c1502b3f933ace20c711ce34ab696f5a67086d Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:38:36 +0800
Subject: KVM: introduce KVM_PFN_ERR_HWPOISON

Then, get_hwpoison_pfn and is_hwpoison_pfn can be removed

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 13 +------------
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9cf90c8d584..d3cdf69da51 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2649,7 +2649,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
 	kvm_release_pfn_clean(pfn);
-	if (is_hwpoison_pfn(pfn)) {
+	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ef5554f4748..840f44a096c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -49,6 +49,7 @@
 	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
 
 #define KVM_PFN_ERR_FAULT	(-EFAULT)
+#define KVM_PFN_ERR_HWPOISON	(-EHWPOISON)
 
 /*
  * vcpu->requests bit members
@@ -395,7 +396,6 @@ extern struct page *bad_page;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
-int is_hwpoison_pfn(pfn_t pfn);
 int is_noslot_pfn(pfn_t pfn);
 int is_invalid_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef0491645a1..7fce2d5787a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -939,17 +939,6 @@ static pfn_t get_bad_pfn(void)
 	return -ENOENT;
 }
 
-static pfn_t get_hwpoison_pfn(void)
-{
-	return -EHWPOISON;
-}
-
-int is_hwpoison_pfn(pfn_t pfn)
-{
-	return pfn == -EHWPOISON;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-
 int is_noslot_pfn(pfn_t pfn)
 {
 	return pfn == -ENOENT;
@@ -1115,7 +1104,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		if (npages == -EHWPOISON ||
 			(!async && check_user_page_hwpoison(addr))) {
 			up_read(&current->mm->mmap_sem);
-			return get_hwpoison_pfn();
+			return KVM_PFN_ERR_HWPOISON;
 		}
 
 		vma = find_vma_intersection(current->mm, addr, addr+1);
-- 
cgit v1.2.3-70-g09d2


From cb9aaa30b133574b646d9d4766ef08a843211393 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:42:10 +0800
Subject: KVM: do not release the error pfn

After commit a2766325cf9f9, the error pfn is replaced by the
error code, it need not be released anymore

[ The patch has been compiling tested for powerpc ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c |  1 -
 arch/x86/kvm/mmu.c          |  7 +++----
 arch/x86/kvm/mmu_audit.c    |  4 +---
 arch/x86/kvm/paging_tmpl.h  |  8 ++------
 virt/kvm/iommu.c            |  1 -
 virt/kvm/kvm_main.c         | 14 ++++++++------
 6 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c8f6c582674..09ce5ac128f 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -524,7 +524,6 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		if (is_error_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
-			kvm_release_pfn_clean(pfn);
 			return;
 		}
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d3cdf69da51..9651c2cd000 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2496,7 +2496,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 				rmap_recycle(vcpu, sptep, gfn);
 		}
 	}
-	kvm_release_pfn_clean(pfn);
+
+	if (!is_error_pfn(pfn))
+		kvm_release_pfn_clean(pfn);
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -2648,7 +2650,6 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
-	kvm_release_pfn_clean(pfn);
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
@@ -3273,8 +3274,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	kvm_release_pfn_clean(*pfn);
-
 	if (!prefault && can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index ca403f9bb0f..daff69e2115 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 	gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
 
-	if (is_error_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
+	if (is_error_pfn(pfn))
 		return;
-	}
 
 	hpa =  pfn << PAGE_SHIFT;
 	if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bb7cf01cae7..bf8c42bf50f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -370,10 +370,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
-	if (mmu_invalid_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
+	if (mmu_invalid_pfn(pfn))
 		return;
-	}
 
 	/*
 	 * we call mmu_set_spte() with host_writable = true because that
@@ -448,10 +446,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 				      pte_access & ACC_WRITE_MASK);
-		if (mmu_invalid_pfn(pfn)) {
-			kvm_release_pfn_clean(pfn);
+		if (mmu_invalid_pfn(pfn))
 			break;
-		}
 
 		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
 			     NULL, PT_PAGE_TABLE_LEVEL, gfn,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 6a67bea4019..037cb6730e6 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -107,7 +107,6 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 */
 		pfn = kvm_pin_pages(slot, gfn, page_size);
 		if (is_error_pfn(pfn)) {
-			kvm_release_pfn_clean(pfn);
 			gfn += 1;
 			continue;
 		}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 93d3c6e063c..eafba99d107 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,9 +102,6 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (is_error_pfn(pfn))
-		return false;
-
 	if (pfn_valid(pfn)) {
 		int reserved;
 		struct page *tail = pfn_to_page(pfn);
@@ -1165,10 +1162,13 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
 
 static struct page *kvm_pfn_to_page(pfn_t pfn)
 {
-	WARN_ON(kvm_is_mmio_pfn(pfn));
+	if (is_error_pfn(pfn))
+		return KVM_ERR_PTR_BAD_PAGE;
 
-	if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn))
+	if (kvm_is_mmio_pfn(pfn)) {
+		WARN_ON(1);
 		return KVM_ERR_PTR_BAD_PAGE;
+	}
 
 	return pfn_to_page(pfn);
 }
@@ -1193,7 +1193,9 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+	WARN_ON(is_error_pfn(pfn));
+
+	if (!kvm_is_mmio_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
-- 
cgit v1.2.3-70-g09d2


From 32cad84f44d186654492f1a50a1424c8906ccbd9 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:42:52 +0800
Subject: KVM: do not release the error page

After commit a2766325cf9f9, the error page is replaced by the
error code, it need not be released anymore

[ The patch has been compiling tested for powerpc ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/44x_tlb.c   | 1 -
 arch/powerpc/kvm/book3s_pr.c | 4 +---
 arch/x86/kvm/svm.c           | 1 -
 arch/x86/kvm/vmx.c           | 5 ++---
 arch/x86/kvm/x86.c           | 9 +++------
 include/linux/kvm_host.h     | 2 +-
 virt/kvm/async_pf.c          | 4 ++--
 virt/kvm/kvm_main.c          | 5 +++--
 8 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 33aa715dab2..5dd3ab46997 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -319,7 +319,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
 			(unsigned long long)gfn);
-		kvm_release_page_clean(new_page);
 		return;
 	}
 	hpaddr = page_to_phys(new_page);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a1baec340f7..05c28f59f77 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -242,10 +242,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	int i;
 
 	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (is_error_page(hpage)) {
-		kvm_release_page_clean(hpage);
+	if (is_error_page(hpage))
 		return;
-	}
 
 	hpage_offset = pte->raddr & ~PAGE_MASK;
 	hpage_offset &= ~0xFFFULL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 687d0c30e55..31be4a55744 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2105,7 +2105,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
 	return kmap(page);
 
 error:
-	kvm_release_page_clean(page);
 	kvm_inject_gp(&svm->vcpu, 0);
 
 	return NULL;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d6e4cbc42b8..cc8ad983692 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,10 +596,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
 {
 	struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
-	if (is_error_page(page)) {
-		kvm_release_page_clean(page);
+	if (is_error_page(page))
 		return NULL;
-	}
+
 	return page;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ebf2109318e..7953a9e7cb1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1639,10 +1639,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-		if (is_error_page(vcpu->arch.time_page)) {
-			kvm_release_page_clean(vcpu->arch.time_page);
+		if (is_error_page(vcpu->arch.time_page))
 			vcpu->arch.time_page = NULL;
-		}
+
 		break;
 	}
 	case MSR_KVM_ASYNC_PF_EN:
@@ -3945,10 +3944,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 		goto emul_write;
 
 	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	if (is_error_page(page)) {
-		kvm_release_page_clean(page);
+	if (is_error_page(page))
 		goto emul_write;
-	}
 
 	kaddr = kmap_atomic(page);
 	kaddr += offset_in_page(gpa);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce7c32950f4..07226f820e6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -457,7 +457,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_release_pfn_dirty(pfn_t);
+void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 56f55339189..ea475cd0351 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -111,7 +111,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (work->page)
+		if (!is_error_page(work->page))
 			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
@@ -138,7 +138,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (work->page)
+		if (!is_error_page(work->page))
 			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eafba99d107..a2e85af847c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1186,8 +1186,9 @@ EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
 {
-	if (!is_error_page(page))
-		kvm_release_pfn_clean(page_to_pfn(page));
+	WARN_ON(is_error_page(page));
+
+	kvm_release_pfn_clean(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
-- 
cgit v1.2.3-70-g09d2


From 8a5a87d9b7aef24bcdba763d8ee14982477b0a2e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:26 +0300
Subject: KVM: clean up kvm_(set|get)_apic_base

kvm_get_apic_base() needlessly checks irqchip_in_kernel although it does
the same no matter what result of the check is. kvm_set_apic_base() also
checks for irqchip_in_kernel, but kvm_lapic_set_base() can handle this
case.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7953a9e7cb1..3cafbb12ae0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,20 +246,14 @@ static void drop_user_return_notifiers(void *ignore)
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 {
-	if (irqchip_in_kernel(vcpu->kvm))
-		return vcpu->arch.apic_base;
-	else
-		return vcpu->arch.apic_base;
+	return vcpu->arch.apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 {
 	/* TODO: reserve bits check */
-	if (irqchip_in_kernel(vcpu->kvm))
-		kvm_lapic_set_base(vcpu, data);
-	else
-		vcpu->arch.apic_base = data;
+	kvm_lapic_set_base(vcpu, data);
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-- 
cgit v1.2.3-70-g09d2


From 5dbc8f3fed0b4cb04dfb276150294f21c5f0fc66 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:27 +0300
Subject: KVM: use kvm_lapic_set_base() to change apic_base

Do not change apic_base directly. Use kvm_lapic_set_base() instead.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cd431c85d3..49f4ac047f6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1185,7 +1185,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
 	if (kvm_vcpu_is_bsp(vcpu))
-		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+		kvm_lapic_set_base(vcpu,
+				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
 	vcpu->arch.pv_eoi.msr_val = 0;
 	apic_update_ppr(apic);
 
@@ -1310,8 +1311,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
-	apic->base_address = APIC_DEFAULT_PHYS_BASE;
-	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
+	kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE);
 
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -1380,8 +1380,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	apic->base_address = vcpu->arch.apic_base &
-			     MSR_IA32_APICBASE_BASE;
+	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
-- 
cgit v1.2.3-70-g09d2


From 6aed64a8a440360be875f31eabeacaddb83ef25f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:28 +0300
Subject: KVM: mark apic enabled on start up

According to SDM apic is enabled on start up.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 49f4ac047f6..c3f14fe51e9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1311,7 +1311,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
-	kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE);
+	kvm_lapic_set_base(vcpu,
+			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
-- 
cgit v1.2.3-70-g09d2


From c5cc421ba3219b90f11d151bc55f1608c12830fa Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:30 +0300
Subject: KVM: use jump label to optimize checking for HW enabled APIC in
 APIC_BASE MSR

Usually all APICs are HW enabled so the check can be optimized out.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 29 ++++++++++++++++++++++++++++-
 arch/x86/kvm/lapic.h |  1 +
 arch/x86/kvm/x86.c   |  1 +
 3 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c3f14fe51e9..5b46cab044a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -34,6 +34,7 @@
 #include <asm/current.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
+#include <linux/jump_label.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
@@ -117,9 +118,13 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+struct static_key_deferred apic_hw_disabled __read_mostly;
+
 static inline int apic_hw_enabled(struct kvm_lapic *apic)
 {
-	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	if (static_key_false(&apic_hw_disabled.key))
+		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	return MSR_IA32_APICBASE_ENABLE;
 }
 
 static inline int  apic_sw_enabled(struct kvm_lapic *apic)
@@ -1055,6 +1060,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 
 	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
 
+	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
+		static_key_slow_dec_deferred(&apic_hw_disabled);
+
 	if (vcpu->arch.apic->regs)
 		free_page((unsigned long)vcpu->arch.apic->regs);
 
@@ -1125,6 +1133,14 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		return;
 	}
 
+	/* update jump label if enable bit changes */
+	if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
+		if (value & MSR_IA32_APICBASE_ENABLE)
+			static_key_slow_dec_deferred(&apic_hw_disabled);
+		else
+			static_key_slow_inc(&apic_hw_disabled.key);
+	}
+
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
 		value &= ~MSR_IA32_APICBASE_BSP;
 
@@ -1311,6 +1327,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
+	/*
+	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
+	 * thinking that APIC satet has changed.
+	 */
+	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
 	kvm_lapic_set_base(vcpu,
 			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
@@ -1598,3 +1619,9 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 	return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
 					 addr);
 }
+
+void kvm_lapic_init(void)
+{
+	/* do not patch jump label more than once per second */
+	jump_label_rate_limit(&apic_hw_disabled, HZ);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 166766fffd9..73fa299b68e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -78,4 +78,5 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+void kvm_lapic_init(void);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3cafbb12ae0..29fa18d27e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4903,6 +4903,7 @@ int kvm_arch_init(void *opaque)
 	if (cpu_has_xsave)
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
+	kvm_lapic_init();
 	return 0;
 
 out:
-- 
cgit v1.2.3-70-g09d2


From f8c1ea103947038b7197bdd4c8451886a58af0c0 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:31 +0300
Subject: KVM: use jump label to optimize checking for SW enabled apic in
 spurious interrupt register

Usually all APICs are SW enabled so the check can be optimized out.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5b46cab044a..7f77e96ac5e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -127,9 +127,24 @@ static inline int apic_hw_enabled(struct kvm_lapic *apic)
 	return MSR_IA32_APICBASE_ENABLE;
 }
 
-static inline int  apic_sw_enabled(struct kvm_lapic *apic)
+struct static_key_deferred apic_sw_disabled __read_mostly;
+
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+	if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
+		if (val & APIC_SPIV_APIC_ENABLED)
+			static_key_slow_dec_deferred(&apic_sw_disabled);
+		else
+			static_key_slow_inc(&apic_sw_disabled.key);
+	}
+	apic_set_reg(apic, APIC_SPIV, val);
+}
+
+static inline int apic_sw_enabled(struct kvm_lapic *apic)
 {
-	return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	if (static_key_false(&apic_sw_disabled.key))
+		return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	return APIC_SPIV_APIC_ENABLED;
 }
 
 static inline int apic_enabled(struct kvm_lapic *apic)
@@ -918,7 +933,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		u32 mask = 0x3ff;
 		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 			mask |= APIC_SPIV_DIRECTED_EOI;
-		apic_set_reg(apic, APIC_SPIV, val & mask);
+		apic_set_spiv(apic, val & mask);
 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
 			int i;
 			u32 lvt_val;
@@ -1055,18 +1070,23 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
 
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
 	if (!vcpu->arch.apic)
 		return;
 
-	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (vcpu->arch.apic->regs)
-		free_page((unsigned long)vcpu->arch.apic->regs);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+		static_key_slow_dec_deferred(&apic_sw_disabled);
 
-	kfree(vcpu->arch.apic);
+	if (apic->regs)
+		free_page((unsigned long)apic->regs);
+
+	kfree(apic);
 }
 
 /*
@@ -1182,7 +1202,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
-	apic_set_reg(apic, APIC_SPIV, 0xff);
+	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
 	apic_set_reg(apic, APIC_LDR, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
@@ -1335,6 +1355,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	kvm_lapic_set_base(vcpu,
 			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
+	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
@@ -1404,6 +1425,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
+	apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1624,4 +1646,5 @@ void kvm_lapic_init(void)
 {
 	/* do not patch jump label more than once per second */
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
+	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
-- 
cgit v1.2.3-70-g09d2


From 54e9818f3903902a4ea3046035739b8770880092 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:32 +0300
Subject: KVM: use jump label to optimize checking for in kernel local apic
 presence

Usually all vcpus have local apic pointer initialized, so the check may
be completely skipped.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 62 ++++++++++++++++++++++++++++------------------------
 arch/x86/kvm/x86.c   |  7 +++++-
 arch/x86/kvm/x86.h   |  1 +
 3 files changed, 41 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7f77e96ac5e..650379ba73a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -152,6 +152,13 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	return apic_sw_enabled(apic) &&	apic_hw_enabled(apic);
 }
 
+static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
+{
+	if (static_key_false(&kvm_no_apic_vcpu))
+		return vcpu->arch.apic;
+	return true;
+}
+
 #define LVT_MASK	\
 	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 
@@ -204,7 +211,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	struct kvm_cpuid_entry2 *feat;
 	u32 v = APIC_VERSION;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return;
 
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -305,7 +312,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
 	/* This may race with setting of irr in __apic_accept_irq() and
@@ -313,9 +319,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 	 * will cause vmexit immediately and the value will be recalculated
 	 * on the next vmentry.
 	 */
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return 0;
-	highest_irr = apic_find_highest_irr(apic);
+	highest_irr = apic_find_highest_irr(vcpu->arch.apic);
 
 	return highest_irr;
 }
@@ -1061,9 +1067,7 @@ static int apic_mmio_write(struct kvm_io_device *this,
 
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic)
+	if (vcpu_has_lapic(vcpu))
 		apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1098,10 +1102,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	if (!apic)
-		return 0;
 
-	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+			apic_lvtt_period(apic))
 		return 0;
 
 	return apic->lapic_timer.tscdeadline;
@@ -1110,10 +1113,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	if (!apic)
-		return;
 
-	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+			apic_lvtt_period(apic))
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1125,20 +1127,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return;
+
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
 		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 tpr;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return 0;
-	tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
+
+	tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -1237,7 +1240,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 bool kvm_apic_present(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
+	return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic);
 }
 
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
@@ -1258,10 +1261,11 @@ static bool lapic_is_periodic(struct kvm_lapic *apic)
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *lapic = vcpu->arch.apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return atomic_read(&lapic->lapic_timer.pending);
+	if (vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
+			apic_lvt_enabled(apic, APIC_LVTT))
+		return atomic_read(&apic->lapic_timer.pending);
 
 	return 0;
 }
@@ -1371,7 +1375,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
-	if (!apic || !apic_enabled(apic))
+	if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic))
 		return -1;
 
 	apic_update_ppr(apic);
@@ -1399,7 +1403,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
+	if (!vcpu_has_lapic(vcpu))
+		return;
+
+	if (atomic_read(&apic->lapic_timer.pending) > 0) {
 		if (kvm_apic_local_deliver(apic, APIC_LVTT))
 			atomic_dec(&apic->lapic_timer.pending);
 	}
@@ -1439,13 +1446,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct hrtimer *timer;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return;
 
-	timer = &apic->lapic_timer.timer;
+	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
@@ -1602,7 +1608,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return 1;
 
 	/* if this is ICR write vector before command */
@@ -1616,7 +1622,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 low, high = 0;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return 1;
 
 	if (apic_reg_read(apic, reg, 4, &low))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 29fa18d27e6..8ebf65c349e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6152,6 +6152,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
 }
 
+struct static_key kvm_no_apic_vcpu __read_mostly;
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
@@ -6184,7 +6186,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		r = kvm_create_lapic(vcpu);
 		if (r < 0)
 			goto fail_mmu_destroy;
-	}
+	} else
+		static_key_slow_inc(&kvm_no_apic_vcpu);
 
 	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
 				       GFP_KERNEL);
@@ -6224,6 +6227,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvm_mmu_destroy(vcpu);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
+	if (!irqchip_in_kernel(vcpu->kvm))
+		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3d1134ddb88..2b5219c12ac 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 
 extern u64 host_xcr0;
 
+extern struct static_key kvm_no_apic_vcpu;
 #endif
-- 
cgit v1.2.3-70-g09d2


From c48f14966cc41957d88c66dfe49a439e708ab7b8 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:33 +0300
Subject: KVM: inline kvm_apic_present() and kvm_lapic_enabled()

Those functions are used during interrupt injection. When inlined they
become nops on the fast path.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 143 +++++++++++++++++++--------------------------------
 arch/x86/kvm/lapic.h |  45 +++++++++++++++-
 2 files changed, 96 insertions(+), 92 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 650379ba73a..333c27fa6e9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -73,11 +73,6 @@
 static unsigned int min_timer_period_us = 500;
 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
 
-static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
-{
-	return *((u32 *) (apic->regs + reg_off));
-}
-
 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 {
 	*((u32 *) (apic->regs + reg_off)) = val;
@@ -119,19 +114,11 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 }
 
 struct static_key_deferred apic_hw_disabled __read_mostly;
-
-static inline int apic_hw_enabled(struct kvm_lapic *apic)
-{
-	if (static_key_false(&apic_hw_disabled.key))
-		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
-	return MSR_IA32_APICBASE_ENABLE;
-}
-
 struct static_key_deferred apic_sw_disabled __read_mostly;
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
+	if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
 		if (val & APIC_SPIV_APIC_ENABLED)
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 		else
@@ -140,23 +127,9 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 	apic_set_reg(apic, APIC_SPIV, val);
 }
 
-static inline int apic_sw_enabled(struct kvm_lapic *apic)
-{
-	if (static_key_false(&apic_sw_disabled.key))
-		return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
-}
-
 static inline int apic_enabled(struct kvm_lapic *apic)
 {
-	return apic_sw_enabled(apic) &&	apic_hw_enabled(apic);
-}
-
-static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
-{
-	if (static_key_false(&kvm_no_apic_vcpu))
-		return vcpu->arch.apic;
-	return true;
+	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
 }
 
 #define LVT_MASK	\
@@ -168,34 +141,34 @@ static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
 
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
-	return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
-	return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 }
 
 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 {
-	return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+	return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 }
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) ==
 			APIC_LVT_TIMER_TSCDEADLINE);
 }
@@ -211,7 +184,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	struct kvm_cpuid_entry2 *feat;
 	u32 v = APIC_VERSION;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -319,7 +292,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 	 * will cause vmexit immediately and the value will be recalculated
 	 * on the next vmentry.
 	 */
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 0;
 	highest_irr = apic_find_highest_irr(vcpu->arch.apic);
 
@@ -404,8 +377,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
 	u32 tpr, isrv, ppr, old_ppr;
 	int isr;
 
-	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
-	tpr = apic_get_reg(apic, APIC_TASKPRI);
+	old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
+	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
 
@@ -441,13 +414,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 	u32 logical_id;
 
 	if (apic_x2apic_mode(apic)) {
-		logical_id = apic_get_reg(apic, APIC_LDR);
+		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
 	}
 
-	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
+	logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
 
-	switch (apic_get_reg(apic, APIC_DFR)) {
+	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
 		if (logical_id & mda)
 			result = 1;
@@ -459,7 +432,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 		break;
 	default:
 		apic_debug("Bad DFR vcpu %d: %08x\n",
-			   apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
+			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
 		break;
 	}
 
@@ -617,7 +590,7 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
 	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
 		int trigger_mode;
 		if (apic_test_vector(vector, apic->regs + APIC_TMR))
@@ -632,8 +605,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 
 static void apic_send_ipi(struct kvm_lapic *apic)
 {
-	u32 icr_low = apic_get_reg(apic, APIC_ICR);
-	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
+	u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
 	struct kvm_lapic_irq irq;
 
 	irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -668,7 +641,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	ASSERT(apic != NULL);
 
 	/* if initial count is 0, current count should also be 0 */
-	if (apic_get_reg(apic, APIC_TMICT) == 0)
+	if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
 	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
@@ -724,13 +697,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 		break;
 	case APIC_PROCPRI:
 		apic_update_ppr(apic);
-		val = apic_get_reg(apic, offset);
+		val = kvm_apic_get_reg(apic, offset);
 		break;
 	case APIC_TASKPRI:
 		report_tpr_access(apic, false);
 		/* fall thru */
 	default:
-		val = apic_get_reg(apic, offset);
+		val = kvm_apic_get_reg(apic, offset);
 		break;
 	}
 
@@ -782,7 +755,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 
 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
 {
-	return apic_hw_enabled(apic) &&
+	return kvm_apic_hw_enabled(apic) &&
 	    addr >= apic->base_address &&
 	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
@@ -805,7 +778,7 @@ static void update_divide_count(struct kvm_lapic *apic)
 {
 	u32 tmp1, tmp2, tdcr;
 
-	tdcr = apic_get_reg(apic, APIC_TDCR);
+	tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
 	apic->divide_count = 0x1 << (tmp2 & 0x7);
@@ -822,7 +795,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
 		/* lapic timer in oneshot or periodic mode */
 		now = apic->lapic_timer.timer.base->get_time();
-		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
+		apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
 			    * APIC_BUS_CYCLE_NS * apic->divide_count;
 
 		if (!apic->lapic_timer.period)
@@ -854,7 +827,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   "timer initial count 0x%x, period %lldns, "
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-			   apic_get_reg(apic, APIC_TMICT),
+			   kvm_apic_get_reg(apic, APIC_TMICT),
 			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
@@ -886,7 +859,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 {
-	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+	int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0));
 
 	if (apic_lvt_nmi_mode(lvt0_val)) {
 		if (!nmi_wd_enabled) {
@@ -937,7 +910,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_SPIV: {
 		u32 mask = 0x3ff;
-		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
+		if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 			mask |= APIC_SPIV_DIRECTED_EOI;
 		apic_set_spiv(apic, val & mask);
 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
@@ -945,7 +918,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 			u32 lvt_val;
 
 			for (i = 0; i < APIC_LVT_NUM; i++) {
-				lvt_val = apic_get_reg(apic,
+				lvt_val = kvm_apic_get_reg(apic,
 						       APIC_LVTT + 0x10 * i);
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
@@ -974,7 +947,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
-		if (!apic_sw_enabled(apic))
+		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 
 		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
@@ -983,12 +956,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		break;
 
 	case APIC_LVTT:
-		if ((apic_get_reg(apic, APIC_LVTT) &
+		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		    apic->lapic_timer.timer_mode_mask) !=
 		   (val & apic->lapic_timer.timer_mode_mask))
 			hrtimer_cancel(&apic->lapic_timer.timer);
 
-		if (!apic_sw_enabled(apic))
+		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
@@ -1067,7 +1040,7 @@ static int apic_mmio_write(struct kvm_io_device *this,
 
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
-	if (vcpu_has_lapic(vcpu))
+	if (kvm_vcpu_has_lapic(vcpu))
 		apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1084,7 +1057,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1103,7 +1076,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+	if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
 			apic_lvtt_period(apic))
 		return 0;
 
@@ -1114,7 +1087,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+	if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
 			apic_lvtt_period(apic))
 		return;
 
@@ -1127,21 +1100,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
-		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
+		     | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
 	u64 tpr;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 0;
 
-	tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
+	tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -1238,16 +1211,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		   vcpu->arch.apic_base, apic->base_address);
 }
 
-bool kvm_apic_present(struct kvm_vcpu *vcpu)
-{
-	return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic);
-}
-
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
-{
-	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
-}
-
 /*
  *----------------------------------------------------------------------
  * timer interface
@@ -1263,7 +1226,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
+	if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
 			apic_lvt_enabled(apic, APIC_LVTT))
 		return atomic_read(&apic->lapic_timer.pending);
 
@@ -1272,10 +1235,10 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	u32 reg = apic_get_reg(apic, lvt_type);
+	u32 reg = kvm_apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
 
-	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
+	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
 		vector = reg & APIC_VECTOR_MASK;
 		mode = reg & APIC_MODE_MASK;
 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
@@ -1375,23 +1338,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
-	if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic))
+	if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic))
 		return -1;
 
 	apic_update_ppr(apic);
 	highest_irr = apic_find_highest_irr(apic);
 	if ((highest_irr == -1) ||
-	    ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+	    ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
 		return -1;
 	return highest_irr;
 }
 
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
-	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
+	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
 	int r = 0;
 
-	if (!apic_hw_enabled(vcpu->arch.apic))
+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		r = 1;
 	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
 	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
@@ -1403,7 +1366,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
@@ -1432,7 +1395,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
-	apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV));
+	apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1448,7 +1411,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
 	struct hrtimer *timer;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
@@ -1549,7 +1512,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
+	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
 	max_irr = apic_find_highest_irr(apic);
 	if (max_irr < 0)
 		max_irr = 0;
@@ -1608,7 +1571,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 1;
 
 	/* if this is ICR write vector before command */
@@ -1622,7 +1585,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 low, high = 0;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 1;
 
 	if (apic_reg_read(apic, reg, 4, &low))
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 73fa299b68e..2ad9caa06f9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,8 +55,6 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
-bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -79,4 +77,47 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
+
+static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+	        return *((u32 *) (apic->regs + reg_off));
+}
+
+extern struct static_key kvm_no_apic_vcpu;
+
+static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu)
+{
+	if (static_key_false(&kvm_no_apic_vcpu))
+		return vcpu->arch.apic;
+	return true;
+}
+
+extern struct static_key_deferred apic_hw_disabled;
+
+static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
+{
+	if (static_key_false(&apic_hw_disabled.key))
+		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	return MSR_IA32_APICBASE_ENABLE;
+}
+
+extern struct static_key_deferred apic_sw_disabled;
+
+static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+{
+	if (static_key_false(&apic_sw_disabled.key))
+		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	return APIC_SPIV_APIC_ENABLED;
+}
+
+static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
+}
+
+static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+	return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From a9ad773e0dd833651f0831020a0ea0265c29f2ea Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:36 +0200
Subject: x86, cpu: Fixup tlb_flushall_shift formatting

The TLB characteristics appeared like this in dmesg:

[    0.065817] Last level iTLB entries: 4KB 512, 2MB 1024, 4MB 512
[    0.065817] Last level dTLB entries: 4KB 1024, 2MB 1024, 4MB 512
[    0.065817] tlb_flushall_shift is 0xffffffff

where tlb_flushall_shift is actually -1 but dumped as a hex number.
However, the Kconfig option CONFIG_DEBUG_TLBFLUSH and the rest of the
code treats this as a signed decimal and states "If you set it to -1,
the code flushes the whole TLB unconditionally."

So, fix its formatting in accordance with the other references to it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-2-git-send-email-bp@amd64.org
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 46d8786d655..d239977f361 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -474,7 +474,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
 
 	printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
 		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n"	     \
-		"tlb_flushall_shift is 0x%x\n",
+		"tlb_flushall_shift: %d\n",
 		tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
 		tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
 		tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
-- 
cgit v1.2.3-70-g09d2


From 5b556332c3ab19e6375836d35ca658776e9ba0f6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:37 +0200
Subject: x86, cpu: Push TLB detection CPUID check down

Push the max CPUID leaf check into the ->detect_tlb function and remove
general test case from the generic path.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-3-git-send-email-bp@amd64.org
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 3 +--
 arch/x86/kernel/cpu/intel.c  | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d239977f361..080f4a737e3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -940,8 +940,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	if (boot_cpu_data.cpuid_level >= 2)
-		cpu_detect_tlb(&boot_cpu_data);
+	cpu_detect_tlb(&boot_cpu_data);
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4ce2980a5..198e019a531 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
 	int i, j, n;
 	unsigned int regs[4];
 	unsigned char *desc = (unsigned char *)regs;
+
+	if (c->cpuid_level < 2)
+		return;
+
 	/* Number of times to iterate */
 	n = cpuid_eax(2) & 0xFF;
 
-- 
cgit v1.2.3-70-g09d2


From b46882e4c4de4813947fce940fe74af794a1eb72 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:38 +0200
Subject: x86, cpu: Add AMD TLB size detection

Read I- and DTLB entries count from CPUID on AMD. Handle all the
different family-specific cases.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-4-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19039f..bcd200839c9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,59 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
 }
 #endif
 
+static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+{
+	u32 ebx, eax, ecx, edx;
+	u16 mask = 0xfff;
+
+	if (c->x86 < 0xf)
+		return;
+
+	if (c->extended_cpuid_level < 0x80000006)
+		return;
+
+	cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
+
+	tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
+	tlb_lli_4k[ENTRIES] = ebx & mask;
+
+	/*
+	 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
+	 * characteristics from the CPUID function 0x80000005 instead.
+	 */
+	if (c->x86 == 0xf) {
+		cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+		mask = 0xff;
+	}
+
+	/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+	if (!((eax >> 16) & mask)) {
+		u32 a, b, c, d;
+
+		cpuid(0x80000005, &a, &b, &c, &d);
+		tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
+	} else {
+		tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+	}
+
+	/* a 4M entry uses two 2M entries */
+	tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+
+	/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+	if (!(eax & mask)) {
+		/* Erratum 658 */
+		if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
+			tlb_lli_2m[ENTRIES] = 1024;
+		} else {
+			cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+			tlb_lli_2m[ENTRIES] = eax & 0xff;
+		}
+	} else
+		tlb_lli_2m[ENTRIES] = eax & mask;
+
+	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+}
+
 static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_vendor	= "AMD",
 	.c_ident	= { "AuthenticAMD" },
@@ -756,6 +809,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_size_cache	= amd_size_cache,
 #endif
 	.c_early_init   = early_init_amd,
+	.c_detect_tlb	= cpu_detect_tlb_amd,
 	.c_bsp_init	= bsp_init_amd,
 	.c_init		= init_amd,
 	.c_x86_vendor	= X86_VENDOR_AMD,
-- 
cgit v1.2.3-70-g09d2


From 057237bb35a605d795fd787868a1088705f26ee5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:39 +0200
Subject: x86, cpu: Preset default tlb_flushall_shift on AMD

Run the mprotect.c microbenchmark on all our families >= K8 and preset
the flushall shift variable accordingly.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-5-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bcd200839c9..f7e98a2c0d1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,17 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
 }
 #endif
 
+static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
+{
+	if (!cpu_has_invlpg)
+		return;
+
+	tlb_flushall_shift = 5;
+
+	if (c->x86 <= 0x11)
+		tlb_flushall_shift = 4;
+}
+
 static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
 {
 	u32 ebx, eax, ecx, edx;
@@ -788,6 +799,8 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
 		tlb_lli_2m[ENTRIES] = eax & mask;
 
 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+
+	cpu_set_tlb_flushall_shift(c);
 }
 
 static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
-- 
cgit v1.2.3-70-g09d2


From 64eb0620296f924d5fded755c5ed95fb73649e06 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 8 Aug 2012 15:24:36 +0300
Subject: KVM: correctly detect APIC SW state in kvm_apic_post_state_restore()

For apic_set_spiv() to track APIC SW state correctly it needs to see
previous and next values of the spurious vector register, but currently
memset() overwrite the old value before apic_set_spiv() get a chance to
do tracking. Fix it by calling apic_set_spiv() before overwriting old
value.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 7 +++++--
 arch/x86/kvm/lapic.h | 3 ++-
 arch/x86/kvm/x86.c   | 3 +--
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 333c27fa6e9..18d149d8020 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1389,13 +1389,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	return vector;
 }
 
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
+	/* set SPIV separately to get count of SW disabled APICs right */
+	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
 	kvm_apic_set_version(vcpu);
-	apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2ad9caa06f9..615a8b03016 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -54,7 +54,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ebf65c349e..91a595827de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2348,8 +2348,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-	kvm_apic_post_state_restore(vcpu);
+	kvm_apic_post_state_restore(vcpu, s);
 	update_cr8_intercept(vcpu);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 4670a300a2169e1e922593c5d35b0cdaee112901 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 9 Aug 2012 10:59:21 -0700
Subject: x86/mce: Make cmci_discover() quiet

cmci_discover() works out which machine check banks support CMCI, and
which of those are shared by multiple logical processors. It uses this
information to ensure that exactly one cpu is designated the owner of
each bank so that when interrupts are broadcast to multiple cpus, only one
of them will look in a shared bank to log the error and clear the bank.

At boot time cmci_discover() performs this task silently. But during
certain cpu hotplug operations it prints out a set of summary lines
like this:

CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11
CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3

The value of these messages seems very low. A user might painstakingly
cross-check against the data sheet for a processor to ensure that all
CMCI supported banks are correctly reported, but this seems improbable.
If users really wanted to do this, we should print the information at
boot time too.

Remove the messages.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce_intel.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc95ff..59648e48a14 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -65,24 +65,15 @@ static void intel_threshold_interrupt(void)
 	mce_notify_irq();
 }
 
-static void print_update(char *type, int *hdr, int num)
-{
-	if (*hdr == 0)
-		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
-	*hdr = 1;
-	printk(KERN_CONT " %s:%d", type, num);
-}
-
 /*
  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
  * on this CPU. Use the algorithm recommended in the SDM to discover shared
  * banks.
  */
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks)
 {
 	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
 	unsigned long flags;
-	int hdr = 0;
 	int i;
 
 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
@@ -96,8 +87,7 @@ static void cmci_discover(int banks, int boot)
 
 		/* Already owned by someone else? */
 		if (val & MCI_CTL2_CMCI_EN) {
-			if (test_and_clear_bit(i, owned) && !boot)
-				print_update("SHD", &hdr, i);
+			clear_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 			continue;
 		}
@@ -109,16 +99,13 @@ static void cmci_discover(int banks, int boot)
 
 		/* Did the enable bit stick? -- the bank supports CMCI */
 		if (val & MCI_CTL2_CMCI_EN) {
-			if (!test_and_set_bit(i, owned) && !boot)
-				print_update("CMCI", &hdr, i);
+			set_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 		} else {
 			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 		}
 	}
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-	if (hdr)
-		printk(KERN_CONT "\n");
 }
 
 /*
@@ -186,7 +173,7 @@ void cmci_rediscover(int dying)
 			continue;
 		/* Recheck banks in case CPUs don't all have the same */
 		if (cmci_supported(&banks))
-			cmci_discover(banks, 0);
+			cmci_discover(banks);
 	}
 
 	set_cpus_allowed_ptr(current, old);
@@ -200,7 +187,7 @@ void cmci_reenable(void)
 {
 	int banks;
 	if (cmci_supported(&banks))
-		cmci_discover(banks, 0);
+		cmci_discover(banks);
 }
 
 static void intel_init_cmci(void)
@@ -211,7 +198,7 @@ static void intel_init_cmci(void)
 		return;
 
 	mce_threshold_vector = intel_threshold_interrupt;
-	cmci_discover(banks, 1);
+	cmci_discover(banks);
 	/*
 	 * For CPU #0 this runs with still disabled APIC, but that's
 	 * ok because only the vector is set up. We still do another
-- 
cgit v1.2.3-70-g09d2


From 55babd8f41f122f5f4c7cebf520c766c983282c6 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Thu, 9 Aug 2012 11:44:51 -0700
Subject: x86/mce: Add CMCI poll mode

On Intel systems corrected machine check interrupts (CMCI) may be sent to
multiple logical processors; possibly to all processors on the affected
socket (SDM Volume 3B "15.5.1 CMCI Local APIC Interface").  This means
that a persistent error (such as a stuck bit in ECC memory) may cause
a storm of interrupts that greatly hinders or prevents forward progress
(probably on many processors).

To solve this we keep track of the rate at which each processor sees
CMCI. If we exceed a threshold, we disable CMCI delivery and switch to
polling the machine check banks. If the storm subsides (none of the
affected processors see any more errors for a complete poll interval) we
re-enable CMCI.

[Tony: Added console messages when storm begins/ends and increased storm
threshold from 5 to 15 so we have a few more logged entries before we
disable interrupts and start dropping reports]

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-internal.h |  12 ++++
 arch/x86/kernel/cpu/mcheck/mce.c          |  47 +++++++++++--
 arch/x86/kernel/cpu/mcheck/mce_intel.c    | 108 +++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a6585..6a05c1d327a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
 
 extern struct mce_bank *mce_banks;
 
+#ifdef CONFIG_X86_MCE_INTEL
+unsigned long mce_intel_adjust_timer(unsigned long interval);
+void mce_intel_cmci_poll(void);
+void mce_intel_hcpu_update(unsigned long cpu);
+#else
+# define mce_intel_adjust_timer mce_adjust_timer_default
+static inline void mce_intel_cmci_poll(void) { }
+static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+#endif
+
+void mce_timer_kick(unsigned long interval);
+
 #ifdef CONFIG_ACPI_APEI
 int apei_write_mce(struct mce *m);
 ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b4dde1527ed..8c1beea6cab 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1260,6 +1260,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
+static unsigned long mce_adjust_timer_default(unsigned long interval)
+{
+	return interval;
+}
+
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+	mce_adjust_timer_default;
+
 static void mce_timer_fn(unsigned long data)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1270,6 +1278,7 @@ static void mce_timer_fn(unsigned long data)
 	if (mce_available(__this_cpu_ptr(&cpu_info))) {
 		machine_check_poll(MCP_TIMESTAMP,
 				&__get_cpu_var(mce_poll_banks));
+		mce_intel_cmci_poll();
 	}
 
 	/*
@@ -1277,14 +1286,38 @@ static void mce_timer_fn(unsigned long data)
 	 * polling interval, otherwise increase the polling interval.
 	 */
 	iv = __this_cpu_read(mce_next_interval);
-	if (mce_notify_irq())
+	if (mce_notify_irq()) {
 		iv = max(iv / 2, (unsigned long) HZ/100);
-	else
+	} else {
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+		iv = mce_adjust_timer(iv);
+	}
 	__this_cpu_write(mce_next_interval, iv);
+	/* Might have become 0 after CMCI storm subsided */
+	if (iv) {
+		t->expires = jiffies + iv;
+		add_timer_on(t, smp_processor_id());
+	}
+}
 
-	t->expires = jiffies + iv;
-	add_timer_on(t, smp_processor_id());
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	unsigned long when = jiffies + interval;
+	unsigned long iv = __this_cpu_read(mce_next_interval);
+
+	if (timer_pending(t)) {
+		if (time_before(when, t->expires))
+			mod_timer_pinned(t, when);
+	} else {
+		t->expires = round_jiffies(when);
+		add_timer_on(t, smp_processor_id());
+	}
+	if (interval < iv)
+		__this_cpu_write(mce_next_interval, interval);
 }
 
 /* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1548,6 +1581,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
+		mce_adjust_timer = mce_intel_adjust_timer;
 		break;
 	case X86_VENDOR_AMD:
 		mce_amd_feature_init(c);
@@ -1559,7 +1593,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 
 static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	unsigned long iv = check_interval * HZ;
+	unsigned long iv = mce_adjust_timer(check_interval * HZ);
 
 	__this_cpu_write(mce_next_interval, iv);
 
@@ -2272,10 +2306,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		mce_device_remove(cpu);
+		mce_intel_hcpu_update(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-		del_timer_sync(t);
 		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		del_timer_sync(t);
 		break;
 	case CPU_DOWN_FAILED:
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 59648e48a14..098386fed48 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
 #include <asm/msr.h>
 #include <asm/mce.h>
 
+#include "mce-internal.h"
+
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
-#define CMCI_THRESHOLD 1
+#define CMCI_THRESHOLD		1
+#define CMCI_POLL_INTERVAL	(30 * HZ)
+#define CMCI_STORM_INTERVAL	(1 * HZ)
+#define CMCI_STORM_THRESHOLD	15
+
+static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
+
+enum {
+	CMCI_STORM_NONE,
+	CMCI_STORM_ACTIVE,
+	CMCI_STORM_SUBSIDED,
+};
+
+static atomic_t cmci_storm_on_cpus;
 
 static int cmci_supported(int *banks)
 {
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
 	return !!(cap & MCG_CMCI_P);
 }
 
+void mce_intel_cmci_poll(void)
+{
+	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
+		return;
+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+}
+
+void mce_intel_hcpu_update(unsigned long cpu)
+{
+	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
+		atomic_dec(&cmci_storm_on_cpus);
+
+	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+}
+
+unsigned long mce_intel_adjust_timer(unsigned long interval)
+{
+	int r;
+
+	if (interval < CMCI_POLL_INTERVAL)
+		return interval;
+
+	switch (__this_cpu_read(cmci_storm_state)) {
+	case CMCI_STORM_ACTIVE:
+		/*
+		 * We switch back to interrupt mode once the poll timer has
+		 * silenced itself. That means no events recorded and the
+		 * timer interval is back to our poll interval.
+		 */
+		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
+		r = atomic_sub_return(1, &cmci_storm_on_cpus);
+		if (r == 0)
+			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+		/* FALLTHROUGH */
+
+	case CMCI_STORM_SUBSIDED:
+		/*
+		 * We wait for all cpus to go back to SUBSIDED
+		 * state. When that happens we switch back to
+		 * interrupt mode.
+		 */
+		if (!atomic_read(&cmci_storm_on_cpus)) {
+			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+			cmci_reenable();
+			cmci_recheck();
+		}
+		return CMCI_POLL_INTERVAL;
+	default:
+		/*
+		 * We have shiny weather. Let the poll do whatever it
+		 * thinks.
+		 */
+		return interval;
+	}
+}
+
+static bool cmci_storm_detect(void)
+{
+	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
+	unsigned long ts = __this_cpu_read(cmci_time_stamp);
+	unsigned long now = jiffies;
+	int r;
+
+	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
+		return true;
+
+	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
+		cnt++;
+	} else {
+		cnt = 1;
+		__this_cpu_write(cmci_time_stamp, now);
+	}
+	__this_cpu_write(cmci_storm_cnt, cnt);
+
+	if (cnt <= CMCI_STORM_THRESHOLD)
+		return false;
+
+	cmci_clear();
+	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
+	r = atomic_add_return(1, &cmci_storm_on_cpus);
+	mce_timer_kick(CMCI_POLL_INTERVAL);
+
+	if (r == 1)
+		pr_notice("CMCI storm detected: switching to poll mode\n");
+	return true;
+}
+
 /*
  * The interrupt handler. This is called on every event.
  * Just call the poller directly to log any events.
@@ -61,6 +165,8 @@ static int cmci_supported(int *banks)
  */
 static void intel_threshold_interrupt(void)
 {
+	if (cmci_storm_detect())
+		return;
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
 	mce_notify_irq();
 }
-- 
cgit v1.2.3-70-g09d2


From c5e63197db519bae1c33e41ea0342a50f39e7a93 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:36 +0200
Subject: perf: Unified API to record selective sets of arch registers

This brings a new API to help the selective dump of registers on event
sampling, and its implementation for x86 arch.

Added HAVE_PERF_REGS config option to determine if the architecture
provides perf registers ABI.

The information about desired registers will be passed in u64 mask.
It's up to the architecture to map the registers into the mask bits.

For the x86 arch implementation, both 32 and 64 bit registers bits are
defined within single enum to ensure 64 bit system can provide register
dump for compat task if needed in the future.

Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
[ Added missing linux/errno.h include ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/Kconfig                     |  6 +++
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/perf_regs.h | 33 +++++++++++++++
 arch/x86/kernel/Makefile         |  2 +
 arch/x86/kernel/perf_regs.c      | 90 ++++++++++++++++++++++++++++++++++++++++
 include/linux/perf_regs.h        | 19 +++++++++
 6 files changed, 151 insertions(+)
 create mode 100644 arch/x86/include/asm/perf_regs.h
 create mode 100644 arch/x86/kernel/perf_regs.c
 create mode 100644 include/linux/perf_regs.h

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa189cc..68d827b7ae8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -222,6 +222,12 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_PERF_REGS
+	bool
+	help
+	  Support selective register dumps for perf events. This includes
+	  bit-mapping of each registers and a unique architecture id.
+
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..3fab6ec9edc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
+	select HAVE_PERF_REGS
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h
new file mode 100644
index 00000000000..3f2207bfd17
--- /dev/null
+++ b/arch/x86/include/asm/perf_regs.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_PERF_REGS_H
+#define _ASM_X86_PERF_REGS_H
+
+enum perf_event_x86_regs {
+	PERF_REG_X86_AX,
+	PERF_REG_X86_BX,
+	PERF_REG_X86_CX,
+	PERF_REG_X86_DX,
+	PERF_REG_X86_SI,
+	PERF_REG_X86_DI,
+	PERF_REG_X86_BP,
+	PERF_REG_X86_SP,
+	PERF_REG_X86_IP,
+	PERF_REG_X86_FLAGS,
+	PERF_REG_X86_CS,
+	PERF_REG_X86_SS,
+	PERF_REG_X86_DS,
+	PERF_REG_X86_ES,
+	PERF_REG_X86_FS,
+	PERF_REG_X86_GS,
+	PERF_REG_X86_R8,
+	PERF_REG_X86_R9,
+	PERF_REG_X86_R10,
+	PERF_REG_X86_R11,
+	PERF_REG_X86_R12,
+	PERF_REG_X86_R13,
+	PERF_REG_X86_R14,
+	PERF_REG_X86_R15,
+
+	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
+	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+};
+#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d9..8d7a619718b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 obj-$(CONFIG_OF)			+= devicetree.o
 obj-$(CONFIG_UPROBES)			+= uprobes.o
 
+obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 00000000000..3d6923528b1
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,90 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_X86_32
+#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
+#else
+#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
+#endif
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
+	PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
+	PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
+	PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
+	PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
+	PT_REGS_OFFSET(PERF_REG_X86_SI, si),
+	PT_REGS_OFFSET(PERF_REG_X86_DI, di),
+	PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
+	PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
+	PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
+	PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
+	PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
+	PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
+#ifdef CONFIG_X86_32
+	PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
+	PT_REGS_OFFSET(PERF_REG_X86_ES, es),
+	PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
+	PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
+#else
+	/*
+	 * The pt_regs struct does not store
+	 * ds, es, fs, gs in 64 bit mode.
+	 */
+	(unsigned int) -1,
+	(unsigned int) -1,
+	(unsigned int) -1,
+	(unsigned int) -1,
+#endif
+#ifdef CONFIG_X86_64
+	PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
+	PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
+	PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
+	PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
+	PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
+	PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
+	PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
+	PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
+#endif
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset)))
+		return 0;
+
+	return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
+
+#ifdef CONFIG_X86_32
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+#else /* CONFIG_X86_64 */
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+		       (1ULL << PERF_REG_X86_ES) | \
+		       (1ULL << PERF_REG_X86_FS) | \
+		       (1ULL << PERF_REG_X86_GS))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	if (mask & REG_NOSUPPORT)
+		return -EINVAL;
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
new file mode 100644
index 00000000000..a2f1a98f783
--- /dev/null
+++ b/include/linux/perf_regs.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_PERF_REGS_H
+#define _LINUX_PERF_REGS_H
+
+#ifdef CONFIG_HAVE_PERF_REGS
+#include <asm/perf_regs.h>
+u64 perf_reg_value(struct pt_regs *regs, int idx);
+int perf_reg_validate(u64 mask);
+#else
+static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	return 0;
+}
+
+static inline int perf_reg_validate(u64 mask)
+{
+	return mask ? -ENOSYS : 0;
+}
+#endif /* CONFIG_HAVE_PERF_REGS */
+#endif /* _LINUX_PERF_REGS_H */
-- 
cgit v1.2.3-70-g09d2


From 4018994f3d8785275ef0e7391b75c3462c029e56 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:37 +0200
Subject: perf: Add ability to attach user level registers dump to sample

Introducing PERF_SAMPLE_REGS_USER sample type bit to trigger the dump of
user level registers on sample. Registers we want to dump are specified
by sample_regs_user bitmask.

Only user level registers are dumped at the moment. Meaning the register
values of the user space context as it was before the user entered the
kernel for whatever reason (syscall, irq, exception, or a PMI happening
in userspace).

The layout of the sample_regs_user bitmap is described in
asm/perf_regs.h for archs that support register dump.

This is going to be useful to bring Dwarf CFI based stack unwinding on
top of samples.

Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
[ Dump registers ABI specification. ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Suggested-by: Stephane Eranian <eranian@google.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/perf_regs.c | 15 +++++++++++
 include/linux/perf_event.h  | 35 +++++++++++++++++++++---
 include/linux/perf_regs.h   |  6 +++++
 kernel/events/core.c        | 66 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 119 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 3d6923528b1..c5a3e5cfe07 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,5 +1,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
 #include <asm/perf_regs.h>
@@ -71,6 +73,11 @@ int perf_reg_validate(u64 mask)
 
 	return 0;
 }
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_32;
+}
 #else /* CONFIG_X86_64 */
 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
 		       (1ULL << PERF_REG_X86_ES) | \
@@ -87,4 +94,12 @@ int perf_reg_validate(u64 mask)
 
 	return 0;
 }
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_IA32))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7602ccb3f40..3d4d84745f0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -130,8 +130,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
+	PERF_SAMPLE_REGS_USER			= 1U << 12,
 
-	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 13,		/* non-ABI */
 };
 
 /*
@@ -162,6 +163,15 @@ enum perf_branch_sample_type {
 	 PERF_SAMPLE_BRANCH_KERNEL|\
 	 PERF_SAMPLE_BRANCH_HV)
 
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+	PERF_SAMPLE_REGS_ABI_NONE	= 0,
+	PERF_SAMPLE_REGS_ABI_32		= 1,
+	PERF_SAMPLE_REGS_ABI_64		= 2,
+};
+
 /*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
@@ -194,6 +204,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
 #define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
 #define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3	88	/* add: sample_regs_user */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -271,7 +282,13 @@ struct perf_event_attr {
 		__u64		bp_len;
 		__u64		config2; /* extension of config1 */
 	};
-	__u64	branch_sample_type; /* enum branch_sample_type */
+	__u64	branch_sample_type; /* enum perf_branch_sample_type */
+
+	/*
+	 * Defines set of user regs to dump on samples.
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_user;
 };
 
 /*
@@ -548,6 +565,9 @@ enum perf_event_type {
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 *
 	 *	{ u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 * 	{ u64			abi; # enum perf_sample_regs_abi
+	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -609,6 +629,7 @@ struct perf_guest_info_callbacks {
 #include <linux/static_key.h>
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
+#include <linux/perf_regs.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -654,6 +675,11 @@ struct perf_branch_stack {
 	struct perf_branch_entry	entries[0];
 };
 
+struct perf_regs_user {
+	__u64		abi;
+	struct pt_regs	*regs;
+};
+
 struct task_struct;
 
 /*
@@ -1133,6 +1159,7 @@ struct perf_sample_data {
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
+	struct perf_regs_user		regs_user;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -1142,7 +1169,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->addr = addr;
 	data->raw  = NULL;
 	data->br_stack = NULL;
-	data->period	= period;
+	data->period = period;
+	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
+	data->regs_user.regs = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index a2f1a98f783..3c73d5fe18b 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -5,6 +5,7 @@
 #include <asm/perf_regs.h>
 u64 perf_reg_value(struct pt_regs *regs, int idx);
 int perf_reg_validate(u64 mask);
+u64 perf_reg_abi(struct task_struct *task);
 #else
 static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
@@ -15,5 +16,10 @@ static inline int perf_reg_validate(u64 mask)
 {
 	return mask ? -ENOSYS : 0;
 }
+
+static inline u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_NONE;
+}
 #endif /* CONFIG_HAVE_PERF_REGS */
 #endif /* _LINUX_PERF_REGS_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b7935fcec7d..d3ce97525b9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3756,6 +3756,37 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 
+static void
+perf_output_sample_regs(struct perf_output_handle *handle,
+			struct pt_regs *regs, u64 mask)
+{
+	int bit;
+
+	for_each_set_bit(bit, (const unsigned long *) &mask,
+			 sizeof(mask) * BITS_PER_BYTE) {
+		u64 val;
+
+		val = perf_reg_value(regs, bit);
+		perf_output_put(handle, val);
+	}
+}
+
+static void perf_sample_regs_user(struct perf_regs_user *regs_user,
+				  struct pt_regs *regs)
+{
+	if (!user_mode(regs)) {
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+
+	if (regs) {
+		regs_user->regs = regs;
+		regs_user->abi  = perf_reg_abi(current);
+	}
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -4016,6 +4047,23 @@ void perf_output_sample(struct perf_output_handle *handle,
 			perf_output_put(handle, nr);
 		}
 	}
+
+	if (sample_type & PERF_SAMPLE_REGS_USER) {
+		u64 abi = data->regs_user.abi;
+
+		/*
+		 * If there are no regs to dump, notice it through
+		 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
+		 */
+		perf_output_put(handle, abi);
+
+		if (abi) {
+			u64 mask = event->attr.sample_regs_user;
+			perf_output_sample_regs(handle,
+						data->regs_user.regs,
+						mask);
+		}
+	}
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -4067,6 +4115,20 @@ void perf_prepare_sample(struct perf_event_header *header,
 		}
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_REGS_USER) {
+		/* regs dump ABI info */
+		int size = sizeof(u64);
+
+		perf_sample_regs_user(&data->regs_user, regs);
+
+		if (data->regs_user.regs) {
+			u64 mask = event->attr.sample_regs_user;
+			size += hweight64(mask) * sizeof(u64);
+		}
+
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -6142,6 +6204,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 			attr->branch_sample_type = mask;
 		}
 	}
+
+	if (attr->sample_type & PERF_SAMPLE_REGS_USER)
+		ret = perf_reg_validate(attr->sample_regs_user);
+
 out:
 	return ret;
 
-- 
cgit v1.2.3-70-g09d2


From 91d7753a45f8525dc75b6be01e427dc1c378dc16 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Aug 2012 15:20:38 +0200
Subject: perf: Factor __output_copy to be usable with specific copy function

Adding a generic way to use __output_copy function with specific copy
function via DEFINE_PERF_OUTPUT_COPY macro.

Using this to add new __output_copy_user function, that provides output
copy from user pointers. For x86 the copy_from_user_nmi function is used
and __copy_from_user_inatomic for the rest of the architectures.

This new function will be used in user stack dump on sample, coming in
next patches.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-4-git-send-email-jolsa@redhat.com
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/include/asm/perf_event.h |  2 ++
 include/linux/perf_event.h        |  2 +-
 kernel/events/internal.h          | 62 ++++++++++++++++++++++++++-------------
 kernel/events/ring_buffer.c       |  4 +--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index cb4e43bce98..4fabcdf1cfa 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { }
  static inline void amd_pmu_disable_virt(void) { }
 #endif
 
+#define arch_perf_out_copy_user copy_from_user_nmi
+
 #endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3d4d84745f0..d41394a1af3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1319,7 +1319,7 @@ static inline bool has_branch_stack(struct perf_event *event)
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
+extern unsigned int perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
 extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index a096c19f2c2..7fd5408493d 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -2,6 +2,7 @@
 #define _KERNEL_EVENTS_INTERNAL_H
 
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 
 /* Buffer handling */
 
@@ -76,30 +77,49 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
 	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
 }
 
-static inline void
-__output_copy(struct perf_output_handle *handle,
-		   const void *buf, unsigned int len)
+#define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
+static inline unsigned int						\
+func_name(struct perf_output_handle *handle,				\
+	  const void *buf, unsigned int len)				\
+{									\
+	unsigned long size, written;					\
+									\
+	do {								\
+		size = min_t(unsigned long, handle->size, len);		\
+									\
+		written = memcpy_func(handle->addr, buf, size);		\
+									\
+		len -= written;						\
+		handle->addr += written;				\
+		buf += written;						\
+		handle->size -= written;				\
+		if (!handle->size) {					\
+			struct ring_buffer *rb = handle->rb;		\
+									\
+			handle->page++;					\
+			handle->page &= rb->nr_pages - 1;		\
+			handle->addr = rb->data_pages[handle->page];	\
+			handle->size = PAGE_SIZE << page_order(rb);	\
+		}							\
+	} while (len && written == size);				\
+									\
+	return len;							\
+}
+
+static inline int memcpy_common(void *dst, const void *src, size_t n)
 {
-	do {
-		unsigned long size = min_t(unsigned long, handle->size, len);
-
-		memcpy(handle->addr, buf, size);
-
-		len -= size;
-		handle->addr += size;
-		buf += size;
-		handle->size -= size;
-		if (!handle->size) {
-			struct ring_buffer *rb = handle->rb;
-
-			handle->page++;
-			handle->page &= rb->nr_pages - 1;
-			handle->addr = rb->data_pages[handle->page];
-			handle->size = PAGE_SIZE << page_order(rb);
-		}
-	} while (len);
+	memcpy(dst, src, n);
+	return n;
 }
 
+DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
+
+#ifndef arch_perf_out_copy_user
+#define arch_perf_out_copy_user __copy_from_user_inatomic
+#endif
+
+DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
+
 /* Callchain handling */
 extern struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6ddaba43fb7..b4c2ad3dee7 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -182,10 +182,10 @@ out:
 	return -ENOSPC;
 }
 
-void perf_output_copy(struct perf_output_handle *handle,
+unsigned int perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {
-	__output_copy(handle, buf, len);
+	return __output_copy(handle, buf, len);
 }
 
 void perf_output_end(struct perf_output_handle *handle)
-- 
cgit v1.2.3-70-g09d2


From c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:40 +0200
Subject: perf: Add ability to attach user stack dump to sample

Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump
of the user level stack on sample. The size of the dump is specified by
sample_stack_user value.

Being able to dump parts of the user stack, starting from the stack
pointer, will be useful to make a post mortem dwarf CFI based stack
unwinding.

Added HAVE_PERF_USER_STACK_DUMP config option to determine if the
architecture provides user stack dump on perf event samples.  This needs
access to the user stack pointer which is not unified across
architectures. Enabling this for x86 architecture.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/Kconfig               |   7 +++
 arch/x86/Kconfig           |   1 +
 include/linux/perf_event.h |  18 +++++-
 kernel/events/core.c       | 150 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/events/internal.h   |  16 +++++
 5 files changed, 190 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 68d827b7ae8..2a83a3f6a61 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -228,6 +228,13 @@ config HAVE_PERF_REGS
 	  Support selective register dumps for perf events. This includes
 	  bit-mapping of each registers and a unique architecture id.
 
+config HAVE_PERF_USER_STACK_DUMP
+	bool
+	help
+	  Support user stack dumps for perf event samples. This needs
+	  access to the user stack pointer which is not unified across
+	  architectures.
+
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3fab6ec9edc..a2d19ee750c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -61,6 +61,7 @@ config X86
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8a73f75beb1..d1d25f6a5e2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -131,8 +131,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_RAW				= 1U << 10,
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 	PERF_SAMPLE_REGS_USER			= 1U << 12,
+	PERF_SAMPLE_STACK_USER			= 1U << 13,
 
-	PERF_SAMPLE_MAX = 1U << 13,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 14,		/* non-ABI */
 };
 
 /*
@@ -205,6 +206,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
 #define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
 #define PERF_ATTR_SIZE_VER3	88	/* add: sample_regs_user */
+#define PERF_ATTR_SIZE_VER4	96	/* add: sample_stack_user */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -289,6 +291,14 @@ struct perf_event_attr {
 	 * See asm/perf_regs.h for details.
 	 */
 	__u64	sample_regs_user;
+
+	/*
+	 * Defines size of the user stack to dump on samples.
+	 */
+	__u32	sample_stack_user;
+
+	/* Align to u64. */
+	__u32	__reserved_2;
 };
 
 /*
@@ -568,6 +578,10 @@ enum perf_event_type {
 	 *
 	 * 	{ u64			abi; # enum perf_sample_regs_abi
 	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+	 *
+	 * 	{ u64			size;
+	 * 	  char			data[size];
+	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -1160,6 +1174,7 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
 	struct perf_regs_user		regs_user;
+	u64				stack_user_size;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->period = period;
 	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
 	data->regs_user.regs = NULL;
+	data->stack_user_size = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3ce97525b9..2ba890450d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/mm_types.h>
 
 #include "internal.h"
 
@@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
 	}
 }
 
+/*
+ * Get remaining task size from user stack pointer.
+ *
+ * It'd be better to take stack vma map and limit this more
+ * precisly, but there's no way to get it safely under interrupt,
+ * so using TASK_SIZE as limit.
+ */
+static u64 perf_ustack_task_size(struct pt_regs *regs)
+{
+	unsigned long addr = perf_user_stack_pointer(regs);
+
+	if (!addr || addr >= TASK_SIZE)
+		return 0;
+
+	return TASK_SIZE - addr;
+}
+
+static u16
+perf_sample_ustack_size(u16 stack_size, u16 header_size,
+			struct pt_regs *regs)
+{
+	u64 task_size;
+
+	/* No regs, no stack pointer, no dump. */
+	if (!regs)
+		return 0;
+
+	/*
+	 * Check if we fit in with the requested stack size into the:
+	 * - TASK_SIZE
+	 *   If we don't, we limit the size to the TASK_SIZE.
+	 *
+	 * - remaining sample size
+	 *   If we don't, we customize the stack size to
+	 *   fit in to the remaining sample size.
+	 */
+
+	task_size  = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
+	stack_size = min(stack_size, (u16) task_size);
+
+	/* Current header size plus static size and dynamic size. */
+	header_size += 2 * sizeof(u64);
+
+	/* Do we fit in with the current stack dump size? */
+	if ((u16) (header_size + stack_size) < header_size) {
+		/*
+		 * If we overflow the maximum size for the sample,
+		 * we customize the stack dump size to fit in.
+		 */
+		stack_size = USHRT_MAX - header_size - sizeof(u64);
+		stack_size = round_up(stack_size, sizeof(u64));
+	}
+
+	return stack_size;
+}
+
+static void
+perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
+			  struct pt_regs *regs)
+{
+	/* Case of a kernel thread, nothing to dump */
+	if (!regs) {
+		u64 size = 0;
+		perf_output_put(handle, size);
+	} else {
+		unsigned long sp;
+		unsigned int rem;
+		u64 dyn_size;
+
+		/*
+		 * We dump:
+		 * static size
+		 *   - the size requested by user or the best one we can fit
+		 *     in to the sample max size
+		 * data
+		 *   - user stack dump data
+		 * dynamic size
+		 *   - the actual dumped size
+		 */
+
+		/* Static size. */
+		perf_output_put(handle, dump_size);
+
+		/* Data. */
+		sp = perf_user_stack_pointer(regs);
+		rem = __output_copy_user(handle, (void *) sp, dump_size);
+		dyn_size = dump_size - rem;
+
+		perf_output_skip(handle, rem);
+
+		/* Dynamic size. */
+		perf_output_put(handle, dyn_size);
+	}
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle,
 						mask);
 		}
 	}
+
+	if (sample_type & PERF_SAMPLE_STACK_USER)
+		perf_output_sample_ustack(handle,
+					  data->stack_user_size,
+					  data->regs_user.regs);
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_STACK_USER) {
+		/*
+		 * Either we need PERF_SAMPLE_STACK_USER bit to be allways
+		 * processed as the last one or have additional check added
+		 * in case new sample type is added, because we could eat
+		 * up the rest of the sample size.
+		 */
+		struct perf_regs_user *uregs = &data->regs_user;
+		u16 stack_size = event->attr.sample_stack_user;
+		u16 size = sizeof(u64);
+
+		if (!uregs->abi)
+			perf_sample_regs_user(uregs, regs);
+
+		stack_size = perf_sample_ustack_size(stack_size, header->size,
+						     uregs->regs);
+
+		/*
+		 * If there is something to dump, add space for the dump
+		 * itself and for the field that tells the dynamic size,
+		 * which is how many have been actually dumped.
+		 */
+		if (stack_size)
+			size += sizeof(u64) + stack_size;
+
+		data->stack_user_size = stack_size;
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 		}
 	}
 
-	if (attr->sample_type & PERF_SAMPLE_REGS_USER)
+	if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
 		ret = perf_reg_validate(attr->sample_regs_user);
+		if (ret)
+			return ret;
+	}
+
+	if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
+		if (!arch_perf_have_user_stack_dump())
+			return -ENOSYS;
+
+		/*
+		 * We have __u32 type for the size, but so far
+		 * we can only use __u16 as maximum due to the
+		 * __u16 sample size limit.
+		 */
+		if (attr->sample_stack_user >= USHRT_MAX)
+			ret = -EINVAL;
+		else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
+			ret = -EINVAL;
+	}
 
 out:
 	return ret;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ce7bdfc1d04..d56a64c99a8 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx)
 	recursion[rctx]--;
 }
 
+#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
+static inline bool arch_perf_have_user_stack_dump(void)
+{
+	return true;
+}
+
+#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
+#else
+static inline bool arch_perf_have_user_stack_dump(void)
+{
+	return false;
+}
+
+#define perf_user_stack_pointer(regs) 0
+#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
+
 #endif /* _KERNEL_EVENTS_INTERNAL_H */
-- 
cgit v1.2.3-70-g09d2


From 51d59c6b422f3f95940ae4e5b42f165595906aee Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 3 Aug 2012 15:57:49 -0300
Subject: KVM: x86: fix pvclock guest stopped flag reporting

kvm_guest_time_update unconditionally clears hv_clock.flags field,
so the notification never reaches the guest.

Fix it by allowing PVCLOCK_GUEST_STOPPED to passthrough.

Reviewed-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 13 ++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1309e69b57f..fc0e752e756 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -420,6 +420,8 @@ struct kvm_vcpu_arch {
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
+	/* set guest stopped flag in pvclock flags field */
+	bool pvclock_set_guest_stopped_request;
 
 	struct {
 		u64 msr_val;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91a595827de..fb0d93788bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1134,6 +1134,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp;
+	u8 pvclock_flags;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -1215,7 +1216,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_kernel_ns = kernel_ns;
 	vcpu->last_guest_tsc = tsc_timestamp;
-	vcpu->hv_clock.flags = 0;
+
+	pvclock_flags = 0;
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	vcpu->hv_clock.flags = pvclock_flags;
 
 	/*
 	 * The interface expects us to write an even number signaling that the
@@ -2624,10 +2632,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
  */
 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 {
-	struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
 	if (!vcpu->arch.time_page)
 		return -EINVAL;
-	src->flags |= PVCLOCK_GUEST_STOPPED;
+	vcpu->arch.pvclock_set_guest_stopped_request = true;
 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From e423ca155d3f5f16b46e30de9c818875b1fd617d Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Tue, 7 Aug 2012 13:10:13 +0530
Subject: KVM: Correct vmrun to vmcall typo

Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_para.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2f7712e08b1..20f5697888b 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -116,7 +116,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
  */
 #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
 
-/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
+/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
  * instructions are guaranteed to be supported.
  *
-- 
cgit v1.2.3-70-g09d2


From 2a7921b7a033d5cfc176690d6297c82846c582b2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Aug 2012 16:12:29 +0300
Subject: KVM: VMX: restore MSR_IA32_DEBUGCTLMSR after VMEXIT

MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT. Restore it to the correct
value.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cc8ad983692..d0f4bec9fc6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6222,6 +6222,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long debugctlmsr;
 
 	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
 		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6261,6 +6262,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
 	atomic_switch_perf_msrs(vmx);
+	debugctlmsr = get_debugctlmsr();
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
@@ -6362,6 +6364,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
+	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+	if (debugctlmsr)
+		update_debugctlmsr(debugctlmsr);
+
 #ifndef CONFIG_X86_64
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to
-- 
cgit v1.2.3-70-g09d2


From dbcb4e798072d114fe68813f39a9efd239ab99c0 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 13 Aug 2012 15:38:22 +0300
Subject: KVM: VMX: Advertize RDTSC exiting to nested guests

All processors that support VMX have that feature, and guests (Xen) depend on
it.  As we already implement it, advertize it to the guest.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d0f4bec9fc6..13e0296cea4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1990,7 +1990,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-		CPU_BASED_RDPMC_EXITING |
+		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
-- 
cgit v1.2.3-70-g09d2


From 28a6fdabb3ea775d3d707afd9d2728b3ced2c34d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Aug 2012 19:20:28 +0300
Subject: KVM: x86: drop parameter validation in ioapic/pic

We validate irq pin number when routing is setup, so
code handling illegal irq # in pic and ioapic on each injection
is never called.
Drop it, replace with BUG_ON to catch out of bounds access bugs.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8259.c | 18 +++++++++---------
 virt/kvm/ioapic.c    | 37 +++++++++++++++++++------------------
 2 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e498b18f010..90c84f947d4 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s)
 
 int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
 {
-	int ret = -1;
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
 
 	pic_lock(s);
-	if (irq >= 0 && irq < PIC_NUM_PINS) {
-		int irq_level = __kvm_irq_line_state(&s->irq_states[irq],
-						     irq_source_id, level);
-		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
-		pic_update_irq(s);
-		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
-				      s->pics[irq >> 3].imr, ret == 0);
-	}
+	irq_level = __kvm_irq_line_state(&s->irq_states[irq],
+					 irq_source_id, level);
+	ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
+	pic_update_irq(s);
+	trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
+			      s->pics[irq >> 3].imr, ret == 0);
 	pic_unlock(s);
 
 	return ret;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ef61d529a6c..cfb7e4d52dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -197,28 +197,29 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 	u32 old_irr;
 	u32 mask = 1 << irq;
 	union kvm_ioapic_redirect_entry entry;
-	int ret = 1;
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
 
 	spin_lock(&ioapic->lock);
 	old_irr = ioapic->irr;
-	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
-		int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
-						     irq_source_id, level);
-		entry = ioapic->redirtbl[irq];
-		irq_level ^= entry.fields.polarity;
-		if (!irq_level)
-			ioapic->irr &= ~mask;
-		else {
-			int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
-			ioapic->irr |= mask;
-			if ((edge && old_irr != ioapic->irr) ||
-			    (!edge && !entry.fields.remote_irr))
-				ret = ioapic_service(ioapic, irq);
-			else
-				ret = 0; /* report coalesced interrupt */
-		}
-		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+					 irq_source_id, level);
+	entry = ioapic->redirtbl[irq];
+	irq_level ^= entry.fields.polarity;
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+	} else {
+		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+		ioapic->irr |= mask;
+		if ((edge && old_irr != ioapic->irr) ||
+		    (!edge && !entry.fields.remote_irr))
+			ret = ioapic_service(ioapic, irq);
+		else
+			ret = 0; /* report coalesced interrupt */
 	}
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 8fbe6a541f50eeec5e3e49bd92db23ade9496673 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 15 Aug 2012 16:00:40 +0200
Subject: KVM guest: disable stealtime on reboot to avoid mem corruption

else, host continues to update stealtime after reboot,
which can corrupt e.g. initramfs area.
found when tracking down initramfs unpack error on initial reboot
(with qemu-kvm -smp 2, no problem with single-core).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kernel/kvm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c1d61ee4b4f..1596cc8fd79 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
+	kvm_disable_steal_time();
 }
 
 static int kvm_pv_reboot_notify(struct notifier_block *nb,
-- 
cgit v1.2.3-70-g09d2


From 023af608254add7ba037cd634cc5f2fb21ff6420 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 22 Jul 2012 18:18:37 +0300
Subject: crypto: aesni_intel - improve lrw and xts performance by utilizing
 parallel AES-NI hardware pipelines

Use parallel LRW and XTS encryption facilities to better utilize AES-NI
hardware pipelines and gain extra performance.

Tcrypt benchmark results (async), old vs new ratios:

Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7)

aes:128bit
        lrw:256bit      xts:256bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     0.99x   1.00x   1.22x   1.19x
64B     1.38x   1.50x   1.58x   1.61x
256B    2.04x   2.02x   2.27x   2.29x
1024B   2.56x   2.54x   2.89x   2.92x
8192B   2.85x   2.99x   3.40x   3.23x

aes:192bit
        lrw:320bit      xts:384bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.08x   1.08x   1.16x   1.17x
64B     1.48x   1.54x   1.59x   1.65x
256B    2.18x   2.17x   2.29x   2.28x
1024B   2.67x   2.67x   2.87x   3.05x
8192B   2.93x   2.84x   3.28x   3.33x

aes:256bit
        lrw:348bit      xts:512bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.07x   1.07x   1.18x   1.19x
64B     1.56x   1.56x   1.70x   1.71x
256B    2.22x   2.24x   2.46x   2.46x
1024B   2.76x   2.77x   3.13x   3.05x
8192B   2.99x   3.05x   3.40x   3.30x

Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 253 ++++++++++++++++++++++++++++++++-----
 crypto/Kconfig                     |   2 +
 2 files changed, 220 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 648347a0577..7c04d0da709 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,9 @@
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i387.h>
 #include <asm/crypto/aes.h>
@@ -41,18 +44,10 @@
 #define HAS_CTR
 #endif
 
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
 #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
 #define HAS_PCBC
 #endif
 
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
@@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data {
 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
 
+struct aesni_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
+struct aesni_xts_ctx {
+	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
 #endif
 #endif
 
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
-}
-#endif
-
 #ifdef HAS_PCBC
 static int ablk_pcbc_init(struct crypto_tfm *tfm)
 {
@@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm)
 }
 #endif
 
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
+static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
 {
-	return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))");
+	aesni_ecb_enc(ctx, blks, blks, nbytes);
+}
+
+static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
+{
+	aesni_ecb_dec(ctx, blks, blks, nbytes);
+}
+
+static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
+				 keylen - AES_BLOCK_SIZE);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
+}
+
+static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
+				  keylen / 2);
+}
+
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
 }
-#endif
 
 #ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_tfm *tfm)
@@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { {
 	},
 #endif
 #endif
-#ifdef HAS_LRW
+#ifdef HAS_PCBC
 }, {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
+	.cra_name		= "pcbc(aes)",
+	.cra_driver_name	= "pcbc-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_lrw_init,
+	.cra_init		= ablk_pcbc_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
@@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { {
 		},
 	},
 #endif
-#ifdef HAS_PCBC
 }, {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
+	.cra_name		= "__lrw-aes-aesni",
+	.cra_driver_name	= "__driver-lrw-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_aesni_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= lrw_aesni_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-aes-aesni",
+	.cra_driver_name	= "__driver-xts-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= xts_aesni_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "lrw(aes)",
+	.cra_driver_name	= "lrw-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_pcbc_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
-#ifdef HAS_XTS
 }, {
 	.cra_name		= "xts(aes)",
 	.cra_driver_name	= "xts-aes-aesni",
@@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_xts_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
 } };
 
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cbcc0e2eeda..213fb37be51 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -564,6 +564,8 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_CRYPTD
 	select CRYPTO_ABLK_HELPER_X86
 	select CRYPTO_ALGAPI
+	select CRYPTO_LRW
+	select CRYPTO_XTS
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
-- 
cgit v1.2.3-70-g09d2


From 0570a365a6b8ccbfe7baa459de2b7396ddf2de90 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 19 Aug 2012 19:06:43 +0200
Subject: x86, boot: Remove obsolete and unused constant RAMDISK

The named constant RAMDISK is unused. It used to set the (obsolete)
kernel boot header field ram_size, but its usage for that purpose got
dropped in commit 5e47c478b0b69bc9bc3ba544e4b1ca3268f98fef ("x86: remove
zImage support"). Now remove this constant too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Link: http://lkml.kernel.org/r/1345396003.1771.9.camel@x61.thuisdomein
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/header.S | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b4e15dd6786..2a017441b8b 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -32,10 +32,6 @@ SYSSEG		= 0x1000		/* historical load address >> 4 */
 #define SVGA_MODE ASK_VGA
 #endif
 
-#ifndef RAMDISK
-#define RAMDISK 0
-#endif
-
 #ifndef ROOT_RDONLY
 #define ROOT_RDONLY 1
 #endif
-- 
cgit v1.2.3-70-g09d2


From ece3234a77ebcd5bbeea6b829c9798328d290cae Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Mon, 13 Aug 2012 22:23:33 +0200
Subject: x86: dt: Use linear irq domain for ioapic(s)

The former conversion to irq_domain_add_legacy() did not fully work
since we miss the irq decs for NR_IRQS_LEGACY+.

Ideally we could use irq_domain_add_simple() or the no-map variant (and
program the virq <-> line mapping directly into ioapic) but this would
require a different irq lookup in "do_IRQ()" and won't work with ACPI
without changes. So this is probably easiest for everyone.

Tested-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Link: http://lkml.kernel.org/r/20120813202304.GA3529@breakpoint.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/devicetree.c | 51 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced4a87..b1581527a23 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
 	.xlate = ioapic_xlate,
 };
 
+static void dt_add_ioapic_domain(unsigned int ioapic_num,
+		struct device_node *np)
+{
+	struct irq_domain *id;
+	struct mp_ioapic_gsi *gsi_cfg;
+	int ret;
+	int num;
+
+	gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
+	num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+	id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
+			(void *)ioapic_num);
+	BUG_ON(!id);
+	if (gsi_cfg->gsi_base == 0) {
+		/*
+		 * The first NR_IRQS_LEGACY irq descs are allocated in
+		 * early_irq_init() and need just a mapping. The
+		 * remaining irqs need both. All of them are preallocated
+		 * and assigned so we can keep the 1:1 mapping which the ioapic
+		 * is having.
+		 */
+		ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
+		if (ret)
+			pr_err("Error mapping legacy IRQs: %d\n", ret);
+
+		if (num > NR_IRQS_LEGACY) {
+			ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
+					NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
+			if (ret)
+				pr_err("Error creating mapping for the "
+						"remaining IRQs: %d\n", ret);
+		}
+		irq_set_default_host(id);
+	} else {
+		ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
+		if (ret)
+			pr_err("Error creating IRQ mapping: %d\n", ret);
+	}
+}
+
 static void __init ioapic_add_ofnode(struct device_node *np)
 {
 	struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
 
 	for (i = 0; i < nr_ioapics; i++) {
 		if (r.start == mpc_ioapic_addr(i)) {
-			struct irq_domain *id;
-			struct mp_ioapic_gsi *gsi_cfg;
-
-			gsi_cfg = mp_ioapic_gsi_routing(i);
-
-			id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
-						   &ioapic_irq_domain_ops,
-						   (void*)i);
-			BUG_ON(!id);
+			dt_add_ioapic_domain(i, np);
 			return;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From d3a8009b1731abb7026a840d1c2701f877f9429f Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yliu.null@gmail.com>
Date: Mon, 6 Aug 2012 22:13:00 +0800
Subject: x86/irq/i8259: Fix incorrect comment

Signed-off-by: Yuanhan Liu <yliu.null@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/i8259.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 36d1853e91a..9a5c460404d 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
 	 * out of.
 	 */
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 }
 
 static struct syscore_ops i8259_syscore_ops = {
-- 
cgit v1.2.3-70-g09d2


From 8e3d9d061b5d132217629e7b5635ff0c02488e65 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 10:57:42 +0800
Subject: KVM: x86: fix possible infinite loop caused by reexecute_instruction

Currently, we reexecute all unhandleable instructions if they do not
access on the mmio, however, it can not work if host map the readonly
memory to guest. If the instruction try to write this kind of memory,
it will fault again when guest retry it, then we will goto a infinite
loop: retry instruction -> write #PF -> emulation fail ->
retry instruction -> ...

Fix it by retrying the instruction only when it faults on the writable
memory

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb0d93788bf..704680d0fa3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4473,6 +4473,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	gpa_t gpa;
+	pfn_t pfn;
 
 	if (tdp_enabled)
 		return false;
@@ -4490,8 +4491,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 	if (gpa == UNMAPPED_GVA)
 		return true; /* let cpu generate fault */
 
-	if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
+	/*
+	 * Do not retry the unhandleable instruction if it faults on the
+	 * readonly host memory, otherwise it will goto a infinite loop:
+	 * retry instruction -> write #PF -> emulation fail -> retry
+	 * instruction -> ...
+	 */
+	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
+	if (!is_error_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
 		return true;
+	}
 
 	return false;
 }
-- 
cgit v1.2.3-70-g09d2


From 037d92dc5d4691ae7cf44699c55ca83b1b441992 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 10:59:12 +0800
Subject: KVM: introduce gfn_to_pfn_memslot_atomic

It can instead of hva_to_pfn_atomic

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  5 +----
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/kvm_main.c      | 14 ++++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9651c2cd000..5548971ae80 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2510,15 +2510,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 				     bool no_dirty_log)
 {
 	struct kvm_memory_slot *slot;
-	unsigned long hva;
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
 	if (!slot)
 		return KVM_PFN_ERR_FAULT;
 
-	hva = gfn_to_hva_memslot(slot, gfn);
-
-	return hva_to_pfn_atomic(hva);
+	return gfn_to_pfn_memslot_atomic(slot, gfn);
 }
 
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d4bd4d41e35..52c86e4f6d8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -462,7 +462,6 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
-pfn_t hva_to_pfn_atomic(unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
 		       bool write_fault, bool *writable);
@@ -470,6 +469,8 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
+
 void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7b94d70a323..543f9b7e5aa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1102,12 +1102,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	return pfn;
 }
 
-pfn_t hva_to_pfn_atomic(unsigned long addr)
-{
-	return hva_to_pfn(addr, true, NULL, true, NULL);
-}
-EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
-
 static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 			  bool write_fault, bool *writable)
 {
@@ -1155,6 +1149,14 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 	return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+
+	return hva_to_pfn(addr, true, NULL, true, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
+
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 								  int nr_pages)
 {
-- 
cgit v1.2.3-70-g09d2


From 4d8b81abc47b83a1939e59df2fdb0e98dfe0eedd Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 11:02:51 +0800
Subject: KVM: introduce readonly memslot

In current code, if we map a readonly memory space from host to guest
and the page is not currently mapped in the host, we will get a fault
pfn and async is not allowed, then the vm will crash

We introduce readonly memory region to map ROM/ROMD to the guest, read access
is happy for readonly memslot, write access on readonly memslot will cause
KVM_EXIT_MMIO exit

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 10 ++--
 arch/x86/include/asm/kvm.h        |  1 +
 arch/x86/kvm/mmu.c                |  9 ++++
 arch/x86/kvm/x86.c                |  1 +
 include/linux/kvm.h               |  6 ++-
 include/linux/kvm_host.h          |  7 +--
 virt/kvm/kvm_main.c               | 96 +++++++++++++++++++++++++++++++--------
 7 files changed, 102 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index bf33aaa4c59..b91bfd43f00 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -857,7 +857,8 @@ struct kvm_userspace_memory_region {
 };
 
 /* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)
 
 This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
@@ -873,9 +874,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.
 
-The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
+The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which
 instructs kvm to keep track of writes to memory within the slot.  See
-the KVM_GET_DIRTY_LOG ioctl.
+the KVM_GET_DIRTY_LOG ioctl. Another flag is KVM_MEM_READONLY when the
+KVM_CAP_READONLY_MEM capability, it indicates the guest memory is read-only,
+that means, guest is only allowed to read it. Writes will be posted to
+userspace as KVM_EXIT_MMIO exits.
 
 When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
 region are automatically reflected into the guest.  For example, an mmap()
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617efd67..521bf252e34 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -25,6 +25,7 @@
 #define __KVM_HAVE_DEBUGREGS
 #define __KVM_HAVE_XSAVE
 #define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5548971ae80..8e312a2e141 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2647,6 +2647,15 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
+	/*
+	 * Do not cache the mmio info caused by writing the readonly gfn
+	 * into the spte otherwise read access on readonly gfn also can
+	 * caused mmio page fault and treat it as mmio access.
+	 * Return 1 to tell kvm to emulate it.
+	 */
+	if (pfn == KVM_PFN_ERR_RO_FAULT)
+		return 1;
+
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 704680d0fa3..42bbf4187d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2de335d7f63..d808694673f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -106,7 +106,8 @@ struct kvm_userspace_memory_region {
  * other bits are reserved for kvm internal use which are defined in
  * include/linux/kvm_host.h.
  */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -621,6 +622,9 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#ifdef __KVM_HAVE_READONLY_MEM
+#define KVM_CAP_READONLY_MEM 81
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a913ac709a9..5972c9845dd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -465,6 +465,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
@@ -792,12 +793,6 @@ hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 	return slot->base_gfn + gfn_offset;
 }
 
-static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
-					       gfn_t gfn)
-{
-	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
-}
-
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e3e1658c491..3416f8a31f6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -680,7 +680,13 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
-	if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES)
+	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
+
+#ifdef KVM_CAP_READONLY_MEM
+	valid_flags |= KVM_MEM_READONLY;
+#endif
+
+	if (mem->flags & ~valid_flags)
 		return -EINVAL;
 
 	return 0;
@@ -973,18 +979,45 @@ out:
 	return size;
 }
 
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
-				     gfn_t *nr_pages)
+static bool memslot_is_readonly(struct kvm_memory_slot *slot)
+{
+	return slot->flags & KVM_MEM_READONLY;
+}
+
+static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+					  gfn_t gfn)
+{
+	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
+static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				       gfn_t *nr_pages, bool write)
 {
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return KVM_HVA_ERR_BAD;
 
+	if (memslot_is_readonly(slot) && write)
+		return KVM_HVA_ERR_RO_BAD;
+
 	if (nr_pages)
 		*nr_pages = slot->npages - (gfn - slot->base_gfn);
 
-	return gfn_to_hva_memslot(slot, gfn);
+	return __gfn_to_hva_memslot(slot, gfn);
 }
 
+static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				     gfn_t *nr_pages)
+{
+	return __gfn_to_hva_many(slot, gfn, nr_pages, true);
+}
+
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+				 gfn_t gfn)
+{
+	return gfn_to_hva_many(slot, gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
@@ -997,7 +1030,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  */
 static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
 {
-	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
+	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1106,6 +1139,17 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 	return npages;
 }
 
+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
+{
+	if (unlikely(!(vma->vm_flags & VM_READ)))
+		return false;
+
+	if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
+		return false;
+
+	return true;
+}
+
 /*
  * Pin guest page in memory and return its pfn.
  * @addr: host virtual address which maps memory to the guest
@@ -1130,8 +1174,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	/* we can do it either atomically or asynchronously, not both */
 	BUG_ON(atomic && async);
 
-	BUG_ON(!write_fault && !writable);
-
 	if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
 		return pfn;
 
@@ -1158,7 +1200,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 			vma->vm_pgoff;
 		BUG_ON(!kvm_is_mmio_pfn(pfn));
 	} else {
-		if (async && (vma->vm_flags & VM_WRITE))
+		if (async && vma_is_valid(vma, write_fault))
 			*async = true;
 		pfn = KVM_PFN_ERR_FAULT;
 	}
@@ -1167,19 +1209,40 @@ exit:
 	return pfn;
 }
 
+static pfn_t
+__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+		     bool *async, bool write_fault, bool *writable)
+{
+	unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+
+	if (addr == KVM_HVA_ERR_RO_BAD)
+		return KVM_PFN_ERR_RO_FAULT;
+
+	if (kvm_is_error_hva(addr))
+		return KVM_PFN_ERR_BAD;
+
+	/* Do not map writable pfn in the readonly memslot. */
+	if (writable && memslot_is_readonly(slot)) {
+		*writable = false;
+		writable = NULL;
+	}
+
+	return hva_to_pfn(addr, atomic, async, write_fault,
+			  writable);
+}
+
 static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 			  bool write_fault, bool *writable)
 {
-	unsigned long addr;
+	struct kvm_memory_slot *slot;
 
 	if (async)
 		*async = false;
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return KVM_PFN_ERR_BAD;
+	slot = gfn_to_memslot(kvm, gfn);
 
-	return hva_to_pfn(addr, atomic, async, write_fault, writable);
+	return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
+				    writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1210,15 +1273,12 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(addr, false, NULL, true, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
 }
 
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-
-	return hva_to_pfn(addr, true, NULL, true, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
 
-- 
cgit v1.2.3-70-g09d2


From 4dbf32c30f7e5379588a692c7bf80b05d9ef8026 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 23 Jul 2012 19:05:53 +0200
Subject: x86, microcode: Save an indentation level in reload_for_cpu

Invert the uci->valid check so that the later block can be aligned on
the first indentation level of the function. No functional change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-3-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62db6a..63a95686502 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -276,19 +276,18 @@ static struct platform_device	*microcode_pdev;
 static int reload_for_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ustate;
 	int err = 0;
 
-	if (uci->valid) {
-		enum ucode_state ustate;
-
-		ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
-		if (ustate == UCODE_OK)
-			apply_microcode_on_target(cpu);
-		else
-			if (ustate == UCODE_ERROR)
-				err = -EINVAL;
-	}
+	if (!uci->valid)
+		return err;
 
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	if (ustate == UCODE_OK)
+		apply_microcode_on_target(cpu);
+	else
+		if (ustate == UCODE_ERROR)
+			err = -EINVAL;
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From bb9d3e473d5b324907e15dff4e54410b28ea50e2 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 3 Aug 2012 15:26:50 +0200
Subject: x86, microcode: Drop uci->mc check on resume path

Remove the uci->mc check on the cpu resume path because the low-level
drivers do that anyway.

More importantly, though, this fixes a contrived and obscure but still
important case. Imagine the following:

* boot machine, no new microcode in /lib/firmware

* a subset of the CPUs is offlined

* in the meantime, user puts new fresh microcode container into
/lib/firmware and reloads it by doing
$ echo 1 > /sys/devices/system/cpu/microcode/reload

* offlined cores come back online and they don't get the newer microcode
applied due to this check.

Later patches take care of the issue on AMD.

While at it, cleanup code around it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-4-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 63a95686502..706a5c9b8eb 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -369,13 +369,10 @@ static void microcode_fini_cpu(int cpu)
 
 static enum ucode_state microcode_resume_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (!uci->mc)
-		return UCODE_NFOUND;
-
 	pr_debug("CPU%d updated upon resume\n", cpu);
-	apply_microcode_on_target(cpu);
+
+	if (apply_microcode_on_target(cpu))
+		return UCODE_ERROR;
 
 	return UCODE_OK;
 }
@@ -404,14 +401,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
 static enum ucode_state microcode_update_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
 
 	if (uci->valid)
-		ustate = microcode_resume_cpu(cpu);
-	else
-		ustate = microcode_init_cpu(cpu);
+		return microcode_resume_cpu(cpu);
 
-	return ustate;
+	return microcode_init_cpu(cpu);
 }
 
 static int mc_device_add(struct device *dev, struct subsys_interface *sif)
-- 
cgit v1.2.3-70-g09d2


From 09c3f0d883300c8fc2bb62e9a70cf89a2ada9a80 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 23 Jul 2012 20:15:10 +0200
Subject: x86, microcode: Cleanup cpu hotplug notifier callback

Mask out CPU_TASKS_FROZEN bit so that all _FROZEN cases can be dropped.
Also, add some more comments as to why CPU_ONLINE falls through to
CPU_DOWN_FAILED (no break), and for the CPU_DEAD case. Realign debug
printks better.

Idea blatantly stolen from a tglx patch:
http://marc.info/?l=linux-kernel&m=134267779513862

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-5-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 706a5c9b8eb..dcde544e012 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -470,34 +470,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 	struct device *dev;
 
 	dev = get_cpu_device(cpu);
-	switch (action) {
+
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
 		microcode_update_cpu(cpu);
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
 		pr_debug("CPU%d added\n", cpu);
+		/*
+		 * "break" is missing on purpose here because we want to fall
+		 * through in order to create the sysfs group.
+		 */
+
+	case CPU_DOWN_FAILED:
 		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
 			pr_err("Failed to create group for CPU%d\n", cpu);
 		break;
+
 	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
 		/* Suspend is in progress, only remove the interface */
 		sysfs_remove_group(&dev->kobj, &mc_attr_group);
 		pr_debug("CPU%d removed\n", cpu);
 		break;
 
 	/*
+	 * case CPU_DEAD:
+	 *
 	 * When a CPU goes offline, don't free up or invalidate the copy of
 	 * the microcode in kernel memory, so that we can reuse it when the
 	 * CPU comes back online without unnecessarily requesting the userspace
 	 * for it again.
 	 */
-	case CPU_UP_CANCELED_FROZEN:
-		/* The CPU refused to come up during a system resume */
-		microcode_fini_cpu(cpu);
-		break;
 	}
+
+	/* The CPU refused to come up during a system resume */
+	if (action == CPU_UP_CANCELED_FROZEN)
+		microcode_fini_cpu(cpu);
+
 	return NOTIFY_OK;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e43f6e67ec1c142550860bbe0b51166c5ee4cac8 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 19:17:01 +0200
Subject: x86, microcode: Straighten out Kconfig text

Update and clarify Kconfig help text along with menu names.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-6-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..1ccccc6efb3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -982,25 +982,25 @@ config X86_REBOOTFIXUPS
 	  Say N otherwise.
 
 config MICROCODE
-	tristate "/dev/cpu/microcode - microcode support"
+	tristate "CPU microcode loading support"
 	select FW_LOADER
 	---help---
+
 	  If you say Y here, you will be able to update the microcode on
 	  certain Intel and AMD processors. The Intel support is for the
-	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
-	  Pentium 4, Xeon etc. The AMD support is for family 0x10 and
-	  0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
-	  You will obviously need the actual microcode binary data itself
-	  which is not shipped with the Linux kernel.
+	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
+	  Xeon etc. The AMD support is for families 0x10 and later. You will
+	  obviously need the actual microcode binary data itself which is not
+	  shipped with the Linux kernel.
 
 	  This option selects the general module only, you need to select
 	  at least one vendor specific module as well.
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called microcode.
+	  To compile this driver as a module, choose M here: the module
+	  will be called microcode.
 
 config MICROCODE_INTEL
-	bool "Intel microcode patch loading support"
+	bool "Intel microcode loading support"
 	depends on MICROCODE
 	default MICROCODE
 	select FW_LOADER
@@ -1013,7 +1013,7 @@ config MICROCODE_INTEL
 	  <http://www.urbanmyth.org/microcode/>.
 
 config MICROCODE_AMD
-	bool "AMD microcode patch loading support"
+	bool "AMD microcode loading support"
 	depends on MICROCODE
 	select FW_LOADER
 	---help---
-- 
cgit v1.2.3-70-g09d2


From e7e632f5ba240fbc313c49ed6559681ea57534e9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 20 Jul 2012 14:12:21 +0200
Subject: x86, microcode, AMD: Remove useless get_ucode_data wrapper

get_ucode_data was a trivial memcpy wrapper. Remove it so as not to
obfuscate code unnecessarily with no obvious gain.

No functional change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-7-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/microcode.h | 6 ------
 arch/x86/kernel/microcode_amd.c  | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 4ebe157bf73..8813be60099 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
 extern void __exit exit_amd_microcode(void);
-
-static inline void get_ucode_data(void *to, const u8 *from, size_t n)
-{
-	memcpy(to, from, n);
-}
-
 #else
 static inline struct microcode_ops * __init init_amd_microcode(void)
 {
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 82746f942cd..94213387a3d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -183,7 +183,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
 	memset(patch, 0, PAGE_SIZE);
 
 	/* all looks ok, get the binary patch */
-	get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
+	memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
 
 	return actual_size;
 }
@@ -238,7 +238,7 @@ static int install_equiv_cpu_table(const u8 *buf)
 		return -ENOMEM;
 	}
 
-	get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
+	memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
 
 	/* add header length */
 	return size + CONTAINER_HDR_SZ;
-- 
cgit v1.2.3-70-g09d2


From 685ca6d797af9d41164dd64dd60145d4946fc152 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 Jun 2012 16:17:51 +0200
Subject: x86, microcode, AMD: Check before applying a patch

Make sure we're actually applying a microcode patch to a core which
really needs it.

This brings only a very very very minor slowdown on F10:

0.032218828 sec vs 0.056010626 sec with this patch.

And small speedup on F15:

0.487089449 sec vs 0.180551162 sec (from perf output).

Also, fixup comments while at it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-8-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 94213387a3d..8fdf7d99b80 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -202,11 +202,18 @@ static int apply_microcode_amd(int cpu)
 	if (mc_amd == NULL)
 		return 0;
 
-	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-	/* get patch id after patching */
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
-	/* check current patch id and patch's id for match */
+	/* need to apply patch? */
+	if (rev >= mc_amd->hdr.patch_id) {
+		c->microcode = rev;
+		return 0;
+	}
+
+	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+	/* verify patch application was successful */
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 	if (rev != mc_amd->hdr.patch_id) {
 		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
 		       cpu, mc_amd->hdr.patch_id);
-- 
cgit v1.2.3-70-g09d2


From 5f5b747282c6cc57b91baba37f76de27398b9e60 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 25 Jul 2012 20:06:54 +0200
Subject: x86, microcode, AMD: Read CPUID(1).EAX on the correct cpu

Read the CPUID(1).EAX leaf at the correct cpu and use it to search the
equivalence table for matching microcode patch. No functionality change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-9-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 8fdf7d99b80..25d34b17748 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -82,6 +82,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
+	csig->sig = cpuid_eax(0x00000001);
 	csig->rev = c->microcode;
 	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
 
@@ -118,16 +119,15 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-static u16 find_equiv_id(void)
+static u16 find_equiv_id(unsigned int cpu)
 {
-	unsigned int current_cpu_id, i = 0;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int i = 0;
 
 	BUG_ON(equiv_cpu_table == NULL);
 
-	current_cpu_id = cpuid_eax(0x00000001);
-
 	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
+		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
 			return equiv_cpu_table[i].equiv_cpu;
 
 		i++;
@@ -150,7 +150,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
 	patch_size = *(u32 *)(ucode_ptr + 4);
 	*current_size = patch_size + SECTION_HDR_SIZE;
 
-	equiv_cpu_id = find_equiv_id();
+	equiv_cpu_id = find_equiv_id(cpu);
 	if (!equiv_cpu_id)
 		return 0;
 
-- 
cgit v1.2.3-70-g09d2


From 48e30685caa8bdc4b8d4417d8ac31db59689742c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 26 Jul 2012 15:51:00 +0200
Subject: x86, microcode: Add a refresh firmware flag to ->request_microcode_fw

This is done in preparation for teaching the ucode driver to either load
a new ucode patches container from userspace or use an already cached
version. No functionality change in this patch.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-10-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/microcode.h  |  4 ++--
 arch/x86/kernel/microcode_amd.c   |  3 ++-
 arch/x86/kernel/microcode_core.c  | 11 ++++++-----
 arch/x86/kernel/microcode_intel.c |  3 ++-
 4 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 8813be60099..43d921b4752 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -15,8 +15,8 @@ struct microcode_ops {
 	enum ucode_state (*request_microcode_user) (int cpu,
 				const void __user *buf, size_t size);
 
-	enum ucode_state (*request_microcode_fw) (int cpu,
-				struct device *device);
+	enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
+						  bool refresh_fw);
 
 	void (*microcode_fini_cpu) (int cpu);
 
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 25d34b17748..94ecdaa2405 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -330,7 +330,8 @@ out:
  *
  * These might be larger than 2K.
  */
-static enum ucode_state request_microcode_amd(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+					      bool refresh_fw)
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	const struct firmware *fw;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index dcde544e012..9420972e98f 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -282,7 +282,7 @@ static int reload_for_cpu(int cpu)
 	if (!uci->valid)
 		return err;
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
 	if (ustate == UCODE_OK)
 		apply_microcode_on_target(cpu);
 	else
@@ -377,7 +377,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
 	return UCODE_OK;
 }
 
-static enum ucode_state microcode_init_cpu(int cpu)
+static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
 {
 	enum ucode_state ustate;
 
@@ -388,7 +388,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
 	if (system_state != SYSTEM_RUNNING)
 		return UCODE_NFOUND;
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
+						     refresh_fw);
 
 	if (ustate == UCODE_OK) {
 		pr_debug("CPU%d updated upon init\n", cpu);
@@ -405,7 +406,7 @@ static enum ucode_state microcode_update_cpu(int cpu)
 	if (uci->valid)
 		return microcode_resume_cpu(cpu);
 
-	return microcode_init_cpu(cpu);
+	return microcode_init_cpu(cpu, false);
 }
 
 static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -421,7 +422,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
 	if (err)
 		return err;
 
-	if (microcode_init_cpu(cpu) == UCODE_ERROR)
+	if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
 		return -EINVAL;
 
 	return err;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0327e2b3c40..3544aed3933 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
 	return 0;
 }
 
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device,
+					     bool refresh_fw)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-- 
cgit v1.2.3-70-g09d2


From c96d2c0905cc48e34f2b37b775b59932c416b343 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 14:55:01 +0200
Subject: x86, microcode, AMD: Add reverse equiv table search

We search the equivalence table using the CPUID(1) signature of the
CPU in order to get the equivalence ID of the patch which we need to
apply. Add a function which does the reverse - it will be needed in
later patches.

While at it, pull the other equiv table function up in the file so that
it can be used by other functionality without forward declarations.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-11-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 46 +++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 94ecdaa2405..03ed5af7053 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -78,6 +78,36 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 /* page-sized ucode patch buffer */
 void *patch;
 
+static u16 find_equiv_id(unsigned int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int i = 0;
+
+	BUG_ON(equiv_cpu_table == NULL);
+
+	while (equiv_cpu_table[i].installed_cpu != 0) {
+		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
+			return equiv_cpu_table[i].equiv_cpu;
+
+		i++;
+	}
+	return 0;
+}
+
+static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
+{
+	int i = 0;
+
+	BUG_ON(!equiv_cpu_table);
+
+	while (equiv_cpu_table[i].equiv_cpu != 0) {
+		if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
+			return equiv_cpu_table[i].installed_cpu;
+		i++;
+	}
+	return 0;
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -119,22 +149,6 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-static u16 find_equiv_id(unsigned int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	int i = 0;
-
-	BUG_ON(equiv_cpu_table == NULL);
-
-	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
-			return equiv_cpu_table[i].equiv_cpu;
-
-		i++;
-	}
-	return 0;
-}
-
 /*
  * we signal a good patch is found by returning its size > 0
  */
-- 
cgit v1.2.3-70-g09d2


From a3eb3b4da106a23b5d41bf915726172e31654a65 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 15:38:18 +0200
Subject: x86, microcode, AMD: Add a small, per-family patches cache

This is a trivial cache which collects all ucode patches for the current
family of CPUs on the system. If a newer patch appears due to the
container file being updated in userspace, we replace our cached version
with the new one.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-12-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 67 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 03ed5af7053..cacdc9a5ee4 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -78,12 +78,22 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 /* page-sized ucode patch buffer */
 void *patch;
 
+struct ucode_patch {
+	struct list_head plist;
+	void *data;
+	u32 patch_id;
+	u16 equiv_cpu;
+};
+
+static LIST_HEAD(pcache);
+
 static u16 find_equiv_id(unsigned int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int i = 0;
 
-	BUG_ON(equiv_cpu_table == NULL);
+	if (!equiv_cpu_table)
+		return 0;
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
 		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
@@ -108,6 +118,61 @@ static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
 	return 0;
 }
 
+/*
+ * a small, trivial cache of per-family ucode patches
+ */
+static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist)
+		if (p->equiv_cpu == equiv_cpu)
+			return p;
+	return NULL;
+}
+
+static void update_cache(struct ucode_patch *new_patch)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist) {
+		if (p->equiv_cpu == new_patch->equiv_cpu) {
+			if (p->patch_id >= new_patch->patch_id)
+				/* we already have the latest patch */
+				return;
+
+			list_replace(&p->plist, &new_patch->plist);
+			kfree(p->data);
+			kfree(p);
+			return;
+		}
+	}
+	/* no patch found, add it */
+	list_add_tail(&new_patch->plist, &pcache);
+}
+
+static void free_cache(void)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry_reverse(p, &pcache, plist) {
+		__list_del(p->plist.prev, p->plist.next);
+		kfree(p->data);
+		kfree(p);
+	}
+}
+
+static struct ucode_patch *find_patch(unsigned int cpu)
+{
+	u16 equiv_id;
+
+	equiv_id = find_equiv_id(cpu);
+	if (!equiv_id)
+		return NULL;
+
+	return cache_find_patch(equiv_id);
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-- 
cgit v1.2.3-70-g09d2


From 2efb05e8e9fa3510044e007b90263c73b6a83f84 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 16:16:13 +0200
Subject: x86, microcode, AMD: Rewrite patch application procedure

Limit the access to userspace only on the BSP where we load the
container, verify the patches in it and put them in the patch cache.
Then, at application time, we lookup the correct patch in the cache and
use it.

When we need to reload the userspace container, we do that over the
reload interface:

echo 1 > /sys/devices/system/cpu/microcode/reload

which reloads (a possibly newer) container from userspace and applies
then the newest patches from there.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-13-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 236 ++++++++++++++++++++--------------------
 1 file changed, 121 insertions(+), 115 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index cacdc9a5ee4..5511216b443 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,9 +75,6 @@ struct microcode_amd {
 
 static struct equiv_cpu_entry *equiv_cpu_table;
 
-/* page-sized ucode patch buffer */
-void *patch;
-
 struct ucode_patch {
 	struct list_head plist;
 	void *data;
@@ -184,7 +181,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 	return 0;
 }
 
-static unsigned int verify_ucode_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(int cpu, u32 patch_size,
 				      unsigned int size)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -214,73 +211,25 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-/*
- * we signal a good patch is found by returning its size > 0
- */
-static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
-				  unsigned int leftover_size, int rev,
-				  unsigned int *current_size)
-{
-	struct microcode_header_amd *mc_hdr;
-	unsigned int actual_size, patch_size;
-	u16 equiv_cpu_id;
-
-	/* size of the current patch we're staring at */
-	patch_size = *(u32 *)(ucode_ptr + 4);
-	*current_size = patch_size + SECTION_HDR_SIZE;
-
-	equiv_cpu_id = find_equiv_id(cpu);
-	if (!equiv_cpu_id)
-		return 0;
-
-	/*
-	 * let's look at the patch header itself now
-	 */
-	mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
-
-	if (mc_hdr->processor_rev_id != equiv_cpu_id)
-		return 0;
-
-	/* ucode might be chipset specific -- currently we don't support this */
-	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
-		pr_err("CPU%d: chipset specific code not yet supported\n",
-		       cpu);
-		return 0;
-	}
-
-	if (mc_hdr->patch_id <= rev)
-		return 0;
-
-	/*
-	 * now that the header looks sane, verify its size
-	 */
-	actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
-	if (!actual_size)
-		return 0;
-
-	/* clear the patch buffer */
-	memset(patch, 0, PAGE_SIZE);
-
-	/* all looks ok, get the binary patch */
-	memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
-
-	return actual_size;
-}
-
 static int apply_microcode_amd(int cpu)
 {
-	u32 rev, dummy;
-	int cpu_num = raw_smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-	struct microcode_amd *mc_amd = uci->mc;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct microcode_amd *mc_amd;
+	struct ucode_cpu_info *uci;
+	struct ucode_patch *p;
+	u32 rev, dummy;
+
+	BUG_ON(raw_smp_processor_id() != cpu);
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu_num != cpu);
+	uci = ucode_cpu_info + cpu;
 
-	if (mc_amd == NULL)
+	p = find_patch(cpu);
+	if (!p)
 		return 0;
 
+	mc_amd  = p->data;
+	uci->mc = p->data;
+
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
 	/* need to apply patch? */
@@ -336,61 +285,113 @@ static void free_equiv_cpu_table(void)
 	equiv_cpu_table = NULL;
 }
 
-static enum ucode_state
-generic_load_microcode(int cpu, const u8 *data, size_t size)
+static void cleanup(void)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct microcode_header_amd *mc_hdr = NULL;
-	unsigned int mc_size, leftover, current_size = 0;
+	free_equiv_cpu_table();
+	free_cache();
+}
+
+/*
+ * We return the current size even if some of the checks failed so that
+ * we can skip over the next patch. If we return a negative value, we
+ * signal a grave error like a memory allocation has failed and the
+ * driver cannot continue functioning normally. In such cases, we tear
+ * down everything we've used up so far and exit.
+ */
+static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct microcode_header_amd *mc_hdr;
+	struct ucode_patch *patch;
+	unsigned int patch_size, crnt_size, ret;
+	u32 proc_fam;
+	u16 proc_id;
+
+	patch_size  = *(u32 *)(fw + 4);
+	crnt_size   = patch_size + SECTION_HDR_SIZE;
+	mc_hdr	    = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+	proc_id	    = mc_hdr->processor_rev_id;
+
+	proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
+	if (!proc_fam) {
+		pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
+		return crnt_size;
+	}
+
+	/* check if patch is for the current family */
+	proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
+	if (proc_fam != c->x86)
+		return crnt_size;
+
+	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+		pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
+			mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	ret = verify_patch_size(cpu, patch_size, leftover);
+	if (!ret) {
+		pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	patch = kzalloc(sizeof(*patch), GFP_KERNEL);
+	if (!patch) {
+		pr_err("Patch allocation failure.\n");
+		return -EINVAL;
+	}
+
+	patch->data = kzalloc(patch_size, GFP_KERNEL);
+	if (!patch->data) {
+		pr_err("Patch data allocation failure.\n");
+		kfree(patch);
+		return -EINVAL;
+	}
+
+	/* All looks ok, copy patch... */
+	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
+	INIT_LIST_HEAD(&patch->plist);
+	patch->patch_id  = mc_hdr->patch_id;
+	patch->equiv_cpu = proc_id;
+
+	/* ... and add to cache. */
+	update_cache(patch);
+
+	return crnt_size;
+}
+
+static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+{
+	enum ucode_state ret = UCODE_ERROR;
+	unsigned int leftover;
+	u8 *fw = (u8 *)data;
+	int crnt_size = 0;
 	int offset;
-	const u8 *ucode_ptr = data;
-	void *new_mc = NULL;
-	unsigned int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_ERROR;
 
-	offset = install_equiv_cpu_table(ucode_ptr);
+	offset = install_equiv_cpu_table(data);
 	if (offset < 0) {
 		pr_err("failed to create equivalent cpu table\n");
-		goto out;
+		return ret;
 	}
-	ucode_ptr += offset;
+	fw += offset;
 	leftover = size - offset;
 
-	if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
+	if (*(u32 *)fw != UCODE_UCODE_TYPE) {
 		pr_err("invalid type field in container file section header\n");
-		goto free_table;
+		free_equiv_cpu_table();
+		return ret;
 	}
 
 	while (leftover) {
-		mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
-						 new_rev, &current_size);
-		if (mc_size) {
-			mc_hdr  = patch;
-			new_mc  = patch;
-			new_rev = mc_hdr->patch_id;
-			goto out_ok;
-		}
+		crnt_size = verify_and_add_patch(cpu, fw, leftover);
+		if (crnt_size < 0)
+			return ret;
 
-		ucode_ptr += current_size;
-		leftover  -= current_size;
+		fw	 += crnt_size;
+		leftover -= crnt_size;
 	}
 
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto free_table;
-	}
-
-out_ok:
-	uci->mc = new_mc;
-	state = UCODE_OK;
-	pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
-		 cpu, uci->cpu_sig.rev, new_rev);
-
-free_table:
-	free_equiv_cpu_table();
-
-out:
-	return state;
+	return UCODE_OK;
 }
 
 /*
@@ -401,7 +402,7 @@ out:
  *
  * This legacy file is always smaller than 2K in size.
  *
- * Starting at family 15h they are in family specific firmware files:
+ * Beginning with family 15h, they are in family-specific firmware files:
  *
  *    amd-ucode/microcode_amd_fam15h.bin
  *    amd-ucode/microcode_amd_fam16h.bin
@@ -413,9 +414,13 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 					      bool refresh_fw)
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
-	const struct firmware *fw;
-	enum ucode_state ret = UCODE_NFOUND;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	enum ucode_state ret = UCODE_NFOUND;
+	const struct firmware *fw;
+
+	/* reload ucode container only on the boot cpu */
+	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
 		snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -431,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = generic_load_microcode(cpu, fw->data, fw->size);
+	/* free old equiv table */
+	free_equiv_cpu_table();
+
+	ret = load_microcode_amd(cpu, fw->data, fw->size);
+	if (ret != UCODE_OK)
+		cleanup();
 
-fw_release:
+ fw_release:
 	release_firmware(fw);
 
-out:
+ out:
 	return ret;
 }
 
@@ -470,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
 		return NULL;
 	}
 
-	patch = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!patch)
-		return NULL;
-
 	return &microcode_amd_ops;
 }
 
 void __exit exit_amd_microcode(void)
 {
-	free_page((unsigned long)patch);
+	cleanup();
 }
-- 
cgit v1.2.3-70-g09d2


From 90993cdd1800dc6ef9587431a0c625b978584e81 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 16 Aug 2012 17:00:19 -0300
Subject: x86: KVM guest: merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST

The distinction between CONFIG_KVM_CLOCK and CONFIG_KVM_GUEST is
not so clear anymore, as demonstrated by recent bugs caused by poor
handling of on/off combinations of these options.

Merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST.

Reported-By: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/Kconfig                | 21 ++++++++-------------
 arch/x86/include/asm/kvm_para.h |  4 ++--
 arch/x86/kernel/Makefile        |  3 +--
 arch/x86/kernel/kvm.c           |  2 --
 arch/x86/kernel/setup.c         |  2 +-
 5 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..a42e2e99caa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -573,23 +573,18 @@ config PARAVIRT_TIME_ACCOUNTING
 
 source "arch/x86/xen/Kconfig"
 
-config KVM_CLOCK
-	bool "KVM paravirtualized clock"
-	select PARAVIRT
-	select PARAVIRT_CLOCK
-	---help---
-	  Turning on this option will allow you to run a paravirtualized clock
-	  when running over the KVM hypervisor. Instead of relying on a PIT
-	  (or probably other) emulation by the underlying device model, the host
-	  provides the guest with timing infrastructure such as time of day, and
-	  system time
-
 config KVM_GUEST
-	bool "KVM Guest support"
+	bool "KVM Guest support (including kvmclock)"
+	select PARAVIRT
 	select PARAVIRT
+	select PARAVIRT_CLOCK
+	default y if PARAVIRT_GUEST
 	---help---
 	  This option enables various optimizations for running under the KVM
-	  hypervisor.
+	  hypervisor. It includes a paravirtualized clock, so that instead
+	  of relying on a PIT (or probably other) emulation by the
+	  underlying device model, the host provides the guest with
+	  timing infrastructure such as time of day, and system time
 
 source "arch/x86/lguest/Kconfig"
 
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 20f5697888b..eb3e9d85e1f 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -102,14 +102,14 @@ struct kvm_vcpu_pv_apf_data {
 extern void kvmclock_init(void);
 extern int kvm_register_clock(char *txt);
 
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
 	return false;
 }
-#endif /* CONFIG_KVMCLOCK */
+#endif /* CONFIG_KVM_GUEST */
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d9..7203298e0b8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
 obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
-obj-$(CONFIG_KVM_GUEST)		+= kvm.o
-obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1596cc8fd79..b3e5e51bc90 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -397,9 +397,7 @@ void kvm_disable_steal_time(void)
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
-#ifdef CONFIG_KVM_CLOCK
 	WARN_ON(kvm_register_clock("primary cpu clock"));
-#endif
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b9..b3386ae3438 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p)
 	initmem_init();
 	memblock_find_dma_reserve();
 
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
 	kvmclock_init();
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 816afe4ff98ee10b1d30fd66361be132a0a5cee6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 6 Aug 2012 17:29:49 +0930
Subject: x86/smp: Don't ever patch back to UP if we unplug cpus

We still patch SMP instructions to UP variants if we boot with a
single CPU, but not at any other time.  In particular, not if we
unplug CPUs to return to a single cpu.

Paul McKenney points out:

 mean offline overhead is 6251/48=130.2 milliseconds.

 If I remove the alternatives_smp_switch() from the offline
 path [...] the mean offline overhead is 550/42=13.1 milliseconds

Basically, we're never going to get those 120ms back, and the
code is pretty messy.

We get rid of:

 1) The "smp-alt-once" boot option. It's actually "smp-alt-boot", the
    documentation is wrong. It's now the default.

 2) The skip_smp_alternatives flag used by suspend.

 3) arch_disable_nonboot_cpus_begin() and arch_disable_nonboot_cpus_end()
    which were only used to set this one flag.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul McKenney <paul.mckenney@us.ibm.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/87vcgwwive.fsf@rustcorp.com.au
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kernel-parameters.txt |   3 -
 arch/x86/include/asm/alternative.h  |   4 +-
 arch/x86/kernel/alternative.c       | 107 +++++++++---------------------------
 arch/x86/kernel/smpboot.c           |  20 +------
 arch/x86/xen/smp.c                  |   6 +-
 kernel/cpu.c                        |  11 ----
 6 files changed, 32 insertions(+), 119 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5088c..7aef3345f73 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2638,9 +2638,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	smart2=		[HW]
 			Format: <io1>[,<io2>[,...,<io8>]]
 
-	smp-alt-once	[X86-32,SMP] On a hotplug CPU system, only
-			attempt to substitute SMP alternatives once at boot.
-
 	smsc-ircc2.nopnp	[HW] Don't use PNP to discover SMC devices
 	smsc-ircc2.ircc_cfg=	[HW] Device configuration I/O port
 	smsc-ircc2.ircc_sir=	[HW] SIR base I/O port
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 70780689599..444704c8e18 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *locks, void *locks_end,
 					void *text, void *text_end);
 extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
+extern void alternatives_enable_smp(void);
 extern int alternatives_text_reserved(void *start, void *end);
 extern bool skip_smp_alternatives;
 #else
@@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					       void *locks, void *locks_end,
 					       void *text, void *text_end) {}
 static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
+static inline void alternatives_enable_smp(void) {}
 static inline int alternatives_text_reserved(void *start, void *end)
 {
 	return 0;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index afb7ff79a29..af1f326a31c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,19 +23,6 @@
 
 #define MAX_PATCH_LEN (255-1)
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int smp_alt_once;
-
-static int __init bootonly(char *str)
-{
-	smp_alt_once = 1;
-	return 1;
-}
-__setup("smp-alt-boot", bootonly);
-#else
-#define smp_alt_once 1
-#endif
-
 static int __initdata_or_module debug_alternative;
 
 static int __init debug_alt(char *str)
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 {
 	const s32 *poff;
 
-	if (noreplace_smp)
-		return;
-
 	mutex_lock(&text_mutex);
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
@@ -359,7 +343,7 @@ struct smp_alt_module {
 };
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_MUTEX(smp_alt);
-static int smp_mode = 1;	/* protected by smp_alt */
+static bool uniproc_patched = false;	/* protected by smp_alt */
 
 void __init_or_module alternatives_smp_module_add(struct module *mod,
 						  char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 {
 	struct smp_alt_module *smp;
 
-	if (noreplace_smp)
-		return;
+	mutex_lock(&smp_alt);
+	if (!uniproc_patched)
+		goto unlock;
 
-	if (smp_alt_once) {
-		if (boot_cpu_has(X86_FEATURE_UP))
-			alternatives_smp_unlock(locks, locks_end,
-						text, text_end);
-		return;
-	}
+	if (num_possible_cpus() == 1)
+		/* Don't bother remembering, we'll never have to undo it. */
+		goto smp_unlock;
 
 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
 	if (NULL == smp)
-		return; /* we'll run the (safe but slow) SMP code then ... */
+		/* we'll run the (safe but slow) SMP code then ... */
+		goto unlock;
 
 	smp->mod	= mod;
 	smp->name	= name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 		__func__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	mutex_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
-	if (boot_cpu_has(X86_FEATURE_UP))
-		alternatives_smp_unlock(smp->locks, smp->locks_end,
-					smp->text, smp->text_end);
+smp_unlock:
+	alternatives_smp_unlock(locks, locks_end, text, text_end);
+unlock:
 	mutex_unlock(&smp_alt);
 }
 
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
 
-	if (smp_alt_once || noreplace_smp)
-		return;
-
 	mutex_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		mutex_unlock(&smp_alt);
-		DPRINTK("%s: %s\n", __func__, item->name);
 		kfree(item);
-		return;
+		break;
 	}
 	mutex_unlock(&smp_alt);
 }
 
-bool skip_smp_alternatives;
-void alternatives_smp_switch(int smp)
+void alternatives_enable_smp(void)
 {
 	struct smp_alt_module *mod;
 
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
 	pr_info("lockdep: fixing up alternatives\n");
 #endif
 
-	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
-		return;
-	BUG_ON(!smp && (num_online_cpus() > 1));
+	/* Why bother if there are no other CPUs? */
+	BUG_ON(num_possible_cpus() == 1);
 
 	mutex_lock(&smp_alt);
 
-	/*
-	 * Avoid unnecessary switches because it forces JIT based VMs to
-	 * throw away all cached translations, which can be quite costly.
-	 */
-	if (smp == smp_mode) {
-		/* nothing */
-	} else if (smp) {
+	if (uniproc_patched) {
 		pr_info("switching to SMP code\n");
+		BUG_ON(num_online_cpus() != 1);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
-	} else {
-		pr_info("switching to UP code\n");
-		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-		list_for_each_entry(mod, &smp_alt_modules, next)
-			alternatives_smp_unlock(mod->locks, mod->locks_end,
-						mod->text, mod->text_end);
+		uniproc_patched = false;
 	}
-	smp_mode = smp;
 	mutex_unlock(&smp_alt);
 }
 
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
 
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
-	/* switch to patch-once-at-boottime-only mode and free the
-	 * tables in case we know the number of CPUs will never ever
-	 * change */
-#ifdef CONFIG_HOTPLUG_CPU
-	if (num_possible_cpus() < 2)
-		smp_alt_once = 1;
-#endif
-
 #ifdef CONFIG_SMP
-	if (smp_alt_once) {
-		if (1 == num_possible_cpus()) {
-			pr_info("switching to UP code\n");
-			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-
-			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
-						_text, _etext);
-		}
-	} else {
+	/* Patch to UP if other cpus not imminent. */
+	if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
+		uniproc_patched = true;
 		alternatives_smp_module_add(NULL, "core kernel",
 					    __smp_locks, __smp_locks_end,
 					    _text, _etext);
-
-		/* Only switch to UP mode if we don't immediately boot others */
-		if (num_present_cpus() == 1 || setup_max_cpus <= 1)
-			alternatives_smp_switch(0);
 	}
-#endif
- 	apply_paravirt(__parainstructions, __parainstructions_end);
 
-	if (smp_alt_once)
+	if (!uniproc_patched || num_possible_cpus() == 1)
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
 				(unsigned long)__smp_locks_end);
+#endif
+
+	apply_paravirt(__parainstructions, __parainstructions_end);
 
 	restart_nmi();
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c5a8c314c0..c80a33bc528 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long boot_error = 0;
 	int timeout;
 
-	alternatives_smp_switch(1);
+	/* Just in case we booted with a single CPU. */
+	alternatives_enable_smp();
 
 	idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
 	preempt_enable();
 }
 
-void arch_disable_nonboot_cpus_begin(void)
-{
-	/*
-	 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
-	 * In the suspend path, we will be back in the SMP mode shortly anyways.
-	 */
-	skip_smp_alternatives = true;
-}
-
-void arch_disable_nonboot_cpus_end(void)
-{
-	skip_smp_alternatives = false;
-}
-
 void arch_enable_nonboot_cpus_begin(void)
 {
 	set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			if (system_state == SYSTEM_RUNNING)
 				pr_info("CPU %u is now offline\n", cpu);
-
-			if (1 == num_online_cpus())
-				alternatives_smp_switch(0);
 			return;
 		}
 		msleep(100);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index f58dca7a6e5..353c50f1870 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 		return rc;
 
 	if (num_online_cpus() == 1)
-		alternatives_smp_switch(1);
+		/* Just in case we booted with a single CPU. */
+		alternatives_enable_smp();
 
 	rc = xen_smp_intr_init(cpu);
 	if (rc)
@@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu)
 	unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
 	xen_uninit_lock_cpu(cpu);
 	xen_teardown_timer(cpu);
-
-	if (num_online_cpus() == 1)
-		alternatives_smp_switch(0);
 }
 
 static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 14d32588ccc..f6bfe3e03f6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -439,14 +439,6 @@ EXPORT_SYMBOL_GPL(cpu_up);
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
-void __weak arch_disable_nonboot_cpus_begin(void)
-{
-}
-
-void __weak arch_disable_nonboot_cpus_end(void)
-{
-}
-
 int disable_nonboot_cpus(void)
 {
 	int cpu, first_cpu, error = 0;
@@ -458,7 +450,6 @@ int disable_nonboot_cpus(void)
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
 	cpumask_clear(frozen_cpus);
-	arch_disable_nonboot_cpus_begin();
 
 	printk("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
@@ -474,8 +465,6 @@ int disable_nonboot_cpus(void)
 		}
 	}
 
-	arch_disable_nonboot_cpus_end();
-
 	if (!error) {
 		BUG_ON(num_online_cpus() > 1);
 		/* Make sure the CPUs won't be enabled by someone else */
-- 
cgit v1.2.3-70-g09d2


From d57c5d51a30152f3175d2344cb6395f08bf8ee0c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 9 Feb 2011 13:32:18 -0500
Subject: ftrace/x86: Add support for -mfentry to x86_64

If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
then use that instead of mcount.

With mcount, frame pointers are forced with the -pg option and we
get something like:

<can_vma_merge_before>:
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       53                      push   %rbx
       41 51                   push   %r9
       e8 fe 6a 39 00          callq  ffffffff81483d00 <mcount>
       31 c0                   xor    %eax,%eax
       48 89 fb                mov    %rdi,%rbx
       48 89 d7                mov    %rdx,%rdi
       48 33 73 30             xor    0x30(%rbx),%rsi
       48 f7 c6 ff ff ff f7    test   $0xfffffffff7ffffff,%rsi

With -mfentry, frame pointers are no longer forced and the call looks
like this:

<can_vma_merge_before>:
       e8 33 af 37 00          callq  ffffffff81461b40 <__fentry__>
       53                      push   %rbx
       48 89 fb                mov    %rdi,%rbx
       31 c0                   xor    %eax,%eax
       48 89 d7                mov    %rdx,%rdi
       41 51                   push   %r9
       48 33 73 30             xor    0x30(%rbx),%rsi
       48 f7 c6 ff ff ff f7    test   $0xfffffffff7ffffff,%rsi

This adds the ftrace hook at the beginning of the function before a
frame is set up, and allows the function callbacks to be able to access
parameters. As kprobes now can use function tracing (at least on x86)
this speeds up the kprobe hooks that are at the beginning of the
function.

Link: http://lkml.kernel.org/r/20120807194100.130477900@goodmis.org

Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/ftrace.h    |  7 ++++++-
 arch/x86/kernel/entry_64.S       | 32 +++++++++++++++++++++++++++-----
 arch/x86/kernel/x8664_ksyms_64.c |  6 +++++-
 4 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a2d19ee750c..28dd891a0a1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a6cae0c1720..9a25b522d37 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,7 +35,11 @@
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR		((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR		((long)(__fentry__))
+#else
+# define MCOUNT_ADDR		((long)(mcount))
+#endif
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -46,6 +50,7 @@
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b7a81dcb736..ed767b747fe 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,10 +68,18 @@
 	.section .entry.text, "ax"
 
 #ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook	__fentry__
+#else
+# define function_hook	mcount
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
 	retq
-END(mcount)
+END(function_hook)
 
 /* skip is set if stack has been adjusted */
 .macro ftrace_caller_setup skip=0
@@ -84,7 +92,11 @@ END(mcount)
 	movq RIP(%rsp), %rdi
 	subq $MCOUNT_INSN_SIZE, %rdi
 	/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
 	movq 8(%rbp), %rsi
+#endif
 .endm
 
 ENTRY(ftrace_caller)
@@ -177,7 +189,8 @@ END(ftrace_regs_caller)
 
 
 #else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
 	cmpl $0, function_trace_stop
 	jne  ftrace_stub
 
@@ -199,7 +212,11 @@ trace:
 	MCOUNT_SAVE_FRAME
 
 	movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
 	movq 8(%rbp), %rsi
+#endif
 	subq $MCOUNT_INSN_SIZE, %rdi
 
 	call   *ftrace_trace_function
@@ -207,7 +224,7 @@ trace:
 	MCOUNT_RESTORE_FRAME
 
 	jmp ftrace_stub
-END(mcount)
+END(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
 
@@ -215,9 +232,14 @@ END(mcount)
 ENTRY(ftrace_graph_caller)
 	MCOUNT_SAVE_FRAME
 
+#ifdef CC_USING_FENTRY
+	leaq SS+16(%rsp), %rdi
+	movq $0, %rdx	/* No framepointers needed */
+#else
 	leaq 8(%rbp), %rdi
-	movq RIP(%rsp), %rsi
 	movq (%rbp), %rdx
+#endif
+	movq RIP(%rsp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f5927..1330dd10295 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
 EXPORT_SYMBOL(mcount);
 #endif
+#endif
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
-- 
cgit v1.2.3-70-g09d2


From e3e45c01ae690e65f2650e5288b9af802e95a136 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Fri, 24 Aug 2012 15:34:34 +0200
Subject: perf/x86: Fix microcode revision check for SNB-PEBS

The following patch makes the microcode update code path
actually invoke the perf_check_microcode() function and
thus potentially renabling SNB PEBS.

By default, CONFIG_MICROCODE_OLD_INTERFACE is
forced to Y in arch/x86/Kconfig. There is no
way to disable this. That means that the code
path used in arch/x86/kernel/microcode_core.c
did not include the call to perf_check_microcode().

Thus, even though the microcode was updated to a
version that fixes the SNB PEBS problem, perf_event
would still return EOPNOTSUPP when enabling precise
sampling.

This patch simply adds a call to perf_check_microcode()
in the call path used when OLD_INTERFACE=y.

Signed-off-by: Stephane Eranian <eranian@google.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: peterz@infradead.org
Cc: andi@firstfloor.org
Link: http://lkml.kernel.org/r/20120824133434.GA8014@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/microcode_core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62db6a..9e5bcf1e237 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
+	if (ret > 0)
+		perf_check_microcode();
+
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
-- 
cgit v1.2.3-70-g09d2


From dd856efafe6097a5c9104725c2bca74430423db8 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 27 Aug 2012 23:46:17 +0300
Subject: KVM: x86 emulator: access GPRs on demand

Instead of populating the entire register file, read in registers
as they are accessed, and write back only the modified ones.  This
saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually
used during emulation), and a two 128-byte copies for the registers.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  20 ++-
 arch/x86/kvm/emulate.c             | 299 +++++++++++++++++++++++--------------
 arch/x86/kvm/x86.c                 |  45 +++---
 3 files changed, 220 insertions(+), 144 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c764f43b71c..282aee5d6ac 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -85,6 +85,19 @@ struct x86_instruction_info {
 #define X86EMUL_INTERCEPTED     6 /* Intercepted by nested VMCB/VMCS */
 
 struct x86_emulate_ops {
+	/*
+	 * read_gpr: read a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 */
+	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
+	/*
+	 * write_gpr: write a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 * @val: value to write.
+	 */
+	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
 	/*
 	 * read_std: Read bytes of standard (non-emulated/special) memory.
 	 *           Used for descriptor reading.
@@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
 	bool rip_relative;
 	unsigned long _eip;
 	struct operand memop;
+	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
+	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */
 	/* Fields above regs are cleared together. */
-	unsigned long regs[NR_VCPU_REGS];
+	unsigned long _regs[NR_VCPU_REGS];
 	struct operand *memopp;
 	struct fetch_cache fetch;
 	struct read_cache io_read;
@@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 			 u16 tss_selector, int idt_index, int reason,
 			 bool has_error_code, u32 error_code);
 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
+void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
+void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
+
 #endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e8fb6c5c6c0..5e27ba53261 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -202,6 +202,42 @@ struct gprefix {
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
 #define EFLG_RESERVED_ONE_MASK 2
 
+static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	if (!(ctxt->regs_valid & (1 << nr))) {
+		ctxt->regs_valid |= 1 << nr;
+		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+	}
+	return ctxt->_regs[nr];
+}
+
+static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	ctxt->regs_valid |= 1 << nr;
+	ctxt->regs_dirty |= 1 << nr;
+	return &ctxt->_regs[nr];
+}
+
+static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	reg_read(ctxt, nr);
+	return reg_write(ctxt, nr);
+}
+
+static void writeback_registers(struct x86_emulate_ctxt *ctxt)
+{
+	unsigned reg;
+
+	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
+}
+
+static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
+{
+	ctxt->regs_dirty = 0;
+	ctxt->regs_valid = 0;
+}
+
 /*
  * Instruction emulation:
  * Most instructions are emulated directly via a fragment of inline assembly
@@ -374,8 +410,8 @@ struct gprefix {
 #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
 	do {								\
 		unsigned long _tmp;					\
-		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
-		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
+		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\
+		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\
 									\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "5", "1")			\
@@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
 {
-	masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc);
+	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
 }
 
 static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -786,14 +822,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-static void *decode_register(u8 modrm_reg, unsigned long *regs,
+static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
 			     int highbyte_regs)
 {
 	void *p;
 
-	p = &regs[modrm_reg];
 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
-		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
+		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
+	else
+		p = reg_rmw(ctxt, modrm_reg);
 	return p;
 }
 
@@ -982,10 +1019,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
 
 	op->type = OP_REG;
 	if (ctxt->d & ByteOp) {
-		op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
+		op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
 		op->bytes = 1;
 	} else {
-		op->addr.reg = decode_register(reg, ctxt->regs, 0);
+		op->addr.reg = decode_register(ctxt, reg, 0);
 		op->bytes = ctxt->op_bytes;
 	}
 	fetch_register_operand(op);
@@ -1020,8 +1057,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	if (ctxt->modrm_mod == 3) {
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = decode_register(ctxt->modrm_rm,
-					       ctxt->regs, ctxt->d & ByteOp);
+		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
 		if (ctxt->d & Sse) {
 			op->type = OP_XMM;
 			op->bytes = 16;
@@ -1042,10 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	op->type = OP_MEM;
 
 	if (ctxt->ad_bytes == 2) {
-		unsigned bx = ctxt->regs[VCPU_REGS_RBX];
-		unsigned bp = ctxt->regs[VCPU_REGS_RBP];
-		unsigned si = ctxt->regs[VCPU_REGS_RSI];
-		unsigned di = ctxt->regs[VCPU_REGS_RDI];
+		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
+		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
+		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
+		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
 
 		/* 16-bit ModR/M decode. */
 		switch (ctxt->modrm_mod) {
@@ -1102,17 +1138,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
 				modrm_ea += insn_fetch(s32, ctxt);
 			else {
-				modrm_ea += ctxt->regs[base_reg];
+				modrm_ea += reg_read(ctxt, base_reg);
 				adjust_modrm_seg(ctxt, base_reg);
 			}
 			if (index_reg != 4)
-				modrm_ea += ctxt->regs[index_reg] << scale;
+				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
 			base_reg = ctxt->modrm_rm;
-			modrm_ea += ctxt->regs[base_reg];
+			modrm_ea += reg_read(ctxt, base_reg);
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
@@ -1250,10 +1286,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (rc->pos == rc->end) { /* refill pio read ahead */
 		unsigned int in_page, n;
 		unsigned int count = ctxt->rep_prefix ?
-			address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
+			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
 		in_page = (ctxt->eflags & EFLG_DF) ?
-			offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
-			PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
+			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
+			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
 			count);
 		if (n == 0)
@@ -1533,7 +1569,7 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
 	struct segmented_address addr;
 
 	rsp_increment(ctxt, -bytes);
-	addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
+	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
 	addr.seg = VCPU_SREG_SS;
 
 	return segmented_write(ctxt, addr, data, bytes);
@@ -1552,7 +1588,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	struct segmented_address addr;
 
-	addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
+	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
 	addr.seg = VCPU_SREG_SS;
 	rc = segmented_read(ctxt, addr, dest, len);
 	if (rc != X86EMUL_CONTINUE)
@@ -1620,26 +1656,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
 	int rc;
 	unsigned frame_size = ctxt->src.val;
 	unsigned nesting_level = ctxt->src2.val & 31;
+	ulong rbp;
 
 	if (nesting_level)
 		return X86EMUL_UNHANDLEABLE;
 
-	rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt));
+	rbp = reg_read(ctxt, VCPU_REGS_RBP);
+	rc = push(ctxt, &rbp, stack_size(ctxt));
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
 		      stack_mask(ctxt));
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP],
-		      ctxt->regs[VCPU_REGS_RSP] - frame_size,
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
+		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
 		      stack_mask(ctxt));
 	return X86EMUL_CONTINUE;
 }
 
 static int em_leave(struct x86_emulate_ctxt *ctxt)
 {
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
 		      stack_mask(ctxt));
-	return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes);
+	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
 }
 
 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
@@ -1667,13 +1705,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 
 static int em_pusha(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP];
+	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
 	int rc = X86EMUL_CONTINUE;
 	int reg = VCPU_REGS_RAX;
 
 	while (reg <= VCPU_REGS_RDI) {
 		(reg == VCPU_REGS_RSP) ?
-		(ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]);
+		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
 
 		rc = em_push(ctxt);
 		if (rc != X86EMUL_CONTINUE)
@@ -1702,7 +1740,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes);
+		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
 		--reg;
@@ -1710,7 +1748,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
+static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
 	int rc;
@@ -1759,11 +1797,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	return rc;
 }
 
+int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
+{
+	int rc;
+
+	invalidate_registers(ctxt);
+	rc = __emulate_int_real(ctxt, irq);
+	if (rc == X86EMUL_CONTINUE)
+		writeback_registers(ctxt);
+	return rc;
+}
+
 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
 {
 	switch(ctxt->mode) {
 	case X86EMUL_MODE_REAL:
-		return emulate_int_real(ctxt, irq);
+		return __emulate_int_real(ctxt, irq);
 	case X86EMUL_MODE_VM86:
 	case X86EMUL_MODE_PROT16:
 	case X86EMUL_MODE_PROT32:
@@ -1970,14 +2019,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 {
 	u64 old = ctxt->dst.orig_val64;
 
-	if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) ||
-	    ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) {
-		ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
-		ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
+	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
+	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
+		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
+		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
 		ctxt->eflags &= ~EFLG_ZF;
 	} else {
-		ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) |
-			(u32) ctxt->regs[VCPU_REGS_RBX];
+		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
+			(u32) reg_read(ctxt, VCPU_REGS_RBX);
 
 		ctxt->eflags |= EFLG_ZF;
 	}
@@ -2013,7 +2062,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 {
 	/* Save real source value, then compare EAX against destination. */
 	ctxt->src.orig_val = ctxt->src.val;
-	ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
+	ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
 	emulate_2op_SrcV(ctxt, "cmp");
 
 	if (ctxt->eflags & EFLG_ZF) {
@@ -2022,7 +2071,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 	} else {
 		/* Failure: write the value we saw to EAX. */
 		ctxt->dst.type = OP_REG;
-		ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
+		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 	}
 	return X86EMUL_CONTINUE;
 }
@@ -2159,10 +2208,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
 	if (efer & EFER_LMA) {
 #ifdef CONFIG_X86_64
-		ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF;
 
 		ops->get_msr(ctxt,
 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
@@ -2241,7 +2290,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	ctxt->_eip = msr_data;
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-	ctxt->regs[VCPU_REGS_RSP] = msr_data;
+	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
 
 	return X86EMUL_CONTINUE;
 }
@@ -2291,8 +2340,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
-	ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
+	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
+	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
 
 	return X86EMUL_CONTINUE;
 }
@@ -2361,14 +2410,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
 {
 	tss->ip = ctxt->_eip;
 	tss->flag = ctxt->eflags;
-	tss->ax = ctxt->regs[VCPU_REGS_RAX];
-	tss->cx = ctxt->regs[VCPU_REGS_RCX];
-	tss->dx = ctxt->regs[VCPU_REGS_RDX];
-	tss->bx = ctxt->regs[VCPU_REGS_RBX];
-	tss->sp = ctxt->regs[VCPU_REGS_RSP];
-	tss->bp = ctxt->regs[VCPU_REGS_RBP];
-	tss->si = ctxt->regs[VCPU_REGS_RSI];
-	tss->di = ctxt->regs[VCPU_REGS_RDI];
+	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2384,14 +2433,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 
 	ctxt->_eip = tss->ip;
 	ctxt->eflags = tss->flag | 2;
-	ctxt->regs[VCPU_REGS_RAX] = tss->ax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->cx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->dx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->bx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->sp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->bp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->si;
-	ctxt->regs[VCPU_REGS_RDI] = tss->di;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2476,14 +2525,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
 	tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
 	tss->eip = ctxt->_eip;
 	tss->eflags = ctxt->eflags;
-	tss->eax = ctxt->regs[VCPU_REGS_RAX];
-	tss->ecx = ctxt->regs[VCPU_REGS_RCX];
-	tss->edx = ctxt->regs[VCPU_REGS_RDX];
-	tss->ebx = ctxt->regs[VCPU_REGS_RBX];
-	tss->esp = ctxt->regs[VCPU_REGS_RSP];
-	tss->ebp = ctxt->regs[VCPU_REGS_RBP];
-	tss->esi = ctxt->regs[VCPU_REGS_RSI];
-	tss->edi = ctxt->regs[VCPU_REGS_RDI];
+	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2505,14 +2554,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	ctxt->eflags = tss->eflags | 2;
 
 	/* General purpose registers */
-	ctxt->regs[VCPU_REGS_RAX] = tss->eax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->edx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->ebx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->esp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->ebp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->esi;
-	ctxt->regs[VCPU_REGS_RDI] = tss->edi;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2727,14 +2776,17 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 {
 	int rc;
 
+	invalidate_registers(ctxt);
 	ctxt->_eip = ctxt->eip;
 	ctxt->dst.type = OP_NONE;
 
 	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
 				     has_error_code, error_code);
 
-	if (rc == X86EMUL_CONTINUE)
+	if (rc == X86EMUL_CONTINUE) {
 		ctxt->eip = ctxt->_eip;
+		writeback_registers(ctxt);
+	}
 
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
@@ -2744,8 +2796,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
-	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
+	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
 	op->addr.mem.seg = seg;
 }
 
@@ -2921,7 +2973,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
 {
 	ctxt->dst.type = OP_REG;
 	ctxt->dst.bytes = ctxt->src.bytes;
-	ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
 
 	return X86EMUL_CONTINUE;
@@ -2932,8 +2984,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 	u64 tsc = 0;
 
 	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)tsc;
-	ctxt->regs[VCPU_REGS_RDX] = tsc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2941,10 +2993,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 pmc;
 
-	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
+	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
 		return emulate_gp(ctxt, 0);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
-	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2986,9 +3038,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
-		| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
-	if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data))
+	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
+		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
+	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
 		return emulate_gp(ctxt, 0);
 
 	return X86EMUL_CONTINUE;
@@ -2998,11 +3050,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data))
+	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
 		return emulate_gp(ctxt, 0);
 
-	ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
-	ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
+	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3182,8 +3234,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 
 static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
-	if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		jmp_rel(ctxt, ctxt->src.val);
 
@@ -3192,7 +3244,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 
 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 {
-	if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
+	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
 		jmp_rel(ctxt, ctxt->src.val);
 
 	return X86EMUL_CONTINUE;
@@ -3280,20 +3332,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
 {
 	u32 eax, ebx, ecx, edx;
 
-	eax = ctxt->regs[VCPU_REGS_RAX];
-	ecx = ctxt->regs[VCPU_REGS_RCX];
+	eax = reg_read(ctxt, VCPU_REGS_RAX);
+	ecx = reg_read(ctxt, VCPU_REGS_RCX);
 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
-	ctxt->regs[VCPU_REGS_RAX] = eax;
-	ctxt->regs[VCPU_REGS_RBX] = ebx;
-	ctxt->regs[VCPU_REGS_RCX] = ecx;
-	ctxt->regs[VCPU_REGS_RDX] = edx;
+	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
+	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
 	return X86EMUL_CONTINUE;
 }
 
 static int em_lahf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL;
-	ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3450,7 +3502,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt)
 
 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
 {
-	u64 rax = ctxt->regs[VCPU_REGS_RAX];
+	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
 
 	/* Valid physical address? */
 	if (rax & 0xffff000000000000ULL)
@@ -3472,7 +3524,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
-	u64 rcx = ctxt->regs[VCPU_REGS_RCX];
+	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
 
 	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
 	    (rcx > 3))
@@ -3930,7 +3982,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 	case OpAcc:
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 		fetch_register_operand(op);
 		op->orig_val = op->val;
 		break;
@@ -3938,19 +3990,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		break;
 	case OpDX:
 		op->type = OP_REG;
 		op->bytes = 2;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 		fetch_register_operand(op);
 		break;
 	case OpCL:
 		op->bytes = 1;
-		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
+		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
 		break;
 	case OpImmByte:
 		rc = decode_imm(ctxt, op, 1, true);
@@ -3981,7 +4033,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
 		break;
@@ -4287,6 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
 }
 
+
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
@@ -4371,7 +4424,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		/* All REP prefixes have the same first termination condition */
-		if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) {
+		if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
 			ctxt->eip = ctxt->_eip;
 			goto done;
 		}
@@ -4444,7 +4497,7 @@ special_insn:
 		ctxt->dst.val = ctxt->src.addr.mem.ea;
 		break;
 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
-		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
+		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
 			break;
 		rc = em_xchg(ctxt);
 		break;
@@ -4472,7 +4525,7 @@ special_insn:
 		rc = em_grp2(ctxt);
 		break;
 	case 0xd2 ... 0xd3:	/* Grp2 */
-		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
+		ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
 		rc = em_grp2(ctxt);
 		break;
 	case 0xe9: /* jmp rel */
@@ -4527,14 +4580,14 @@ writeback:
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
+		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
 			 * Re-enter guest when pio read ahead buffer is empty
 			 * or, if it is not used, after each 1024 iteration.
 			 */
-			if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) &&
+			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
 			    (r->end == 0 || r->end != r->pos)) {
 				/*
 				 * Reset read cache. Usually happens before
@@ -4542,6 +4595,7 @@ writeback:
 				 * we have to do it here.
 				 */
 				ctxt->mem_read.end = 0;
+				writeback_registers(ctxt);
 				return EMULATION_RESTART;
 			}
 			goto done; /* skip rip writeback */
@@ -4556,6 +4610,9 @@ done:
 	if (rc == X86EMUL_INTERCEPTED)
 		return EMULATION_INTERCEPTED;
 
+	if (rc == X86EMUL_CONTINUE)
+		writeback_registers(ctxt);
+
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 
 twobyte_insn:
@@ -4628,3 +4685,13 @@ twobyte_insn:
 cannot_emulate:
 	return EMULATION_FAILED;
 }
+
+void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
+{
+	invalidate_registers(ctxt);
+}
+
+void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
+{
+	writeback_registers(ctxt);
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bbf4187d2..e00050ce7a6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4313,7 +4313,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
 }
 
+static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
+{
+	return kvm_register_read(emul_to_vcpu(ctxt), reg);
+}
+
+static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
+{
+	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
+}
+
 static struct x86_emulate_ops emulate_ops = {
+	.read_gpr            = emulator_read_gpr,
+	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
 	.fetch               = kvm_fetch_guest_virt,
@@ -4348,14 +4360,6 @@ static struct x86_emulate_ops emulate_ops = {
 	.get_cpuid           = emulator_get_cpuid,
 };
 
-static void cache_all_regs(struct kvm_vcpu *vcpu)
-{
-	kvm_register_read(vcpu, VCPU_REGS_RAX);
-	kvm_register_read(vcpu, VCPU_REGS_RSP);
-	kvm_register_read(vcpu, VCPU_REGS_RIP);
-	vcpu->arch.regs_dirty = ~0;
-}
-
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 {
 	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
@@ -4382,12 +4386,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 		kvm_queue_exception(vcpu, ctxt->exception.vector);
 }
 
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
-			      const unsigned long *regs)
+static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
 	memset(&ctxt->twobyte, 0,
-	       (void *)&ctxt->regs - (void *)&ctxt->twobyte);
-	memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
+	       (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
 
 	ctxt->fetch.start = 0;
 	ctxt->fetch.end = 0;
@@ -4402,14 +4404,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int cs_db, cs_l;
 
-	/*
-	 * TODO: fix emulate.c to use guest_read/write_register
-	 * instead of direct ->regs accesses, can save hundred cycles
-	 * on Intel for instructions that don't read/change RSP, for
-	 * for example.
-	 */
-	cache_all_regs(vcpu);
-
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
@@ -4421,7 +4415,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 							  X86EMUL_MODE_PROT16;
 	ctxt->guest_mode = is_guest_mode(vcpu);
 
-	init_decode_cache(ctxt, vcpu->arch.regs);
+	init_decode_cache(ctxt);
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
@@ -4441,7 +4435,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 		return EMULATE_FAIL;
 
 	ctxt->eip = ctxt->_eip;
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
@@ -4599,7 +4592,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	   changes registers values  during IO operation */
 	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
 		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
-		memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
+		emulator_invalidate_register_cache(ctxt);
 	}
 
 restart:
@@ -4637,7 +4630,6 @@ restart:
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		kvm_set_rflags(vcpu, ctxt->eflags);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
 	} else
@@ -5591,8 +5583,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		 * that usually, but some bad designed PV devices (vmware
 		 * backdoor interface) need this to work
 		 */
-		struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+		emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 	}
 	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5723,6 +5714,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
+	unsigned reg;
 
 	init_emulate_ctxt(vcpu);
 
@@ -5732,7 +5724,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 	if (ret)
 		return EMULATE_FAIL;
 
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-- 
cgit v1.2.3-70-g09d2


From baa7e81e325bbb6ddebd7680ac1068859244b61d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:06:58 +0300
Subject: KVM: VMX: Separate saving pre-realmode state from setting segments

Commit b246dd5df139 ("KVM: VMX: Fix KVM_SET_SREGS with big real mode
segments") moved fix_rmode_seg() to vmx_set_segment(), so that it is
applied not just on transitions to real mode, but also on KVM_SET_SREGS
(migration).  However fix_rmode_seg() not only munges the vmcs segments,
it also sets up the save area for us to restore when returning to
protected mode or to return in vmx_get_segment().

Move saving the segment into a new function, save_rmode_seg(), and
call it just during the transition.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 13e0296cea4..4e49caf9224 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2768,7 +2768,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 	return kvm->arch.tss_addr;
 }
 
-static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+static void save_rmode_seg(int seg, struct kvm_save_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
@@ -2776,6 +2776,12 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
 	save->base = vmcs_readl(sf->base);
 	save->limit = vmcs_read32(sf->limit);
 	save->ar = vmcs_read32(sf->ar_bytes);
+}
+
+static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
 	vmcs_write16(sf->selector, save->base >> 4);
 	vmcs_write32(sf->base, save->base & 0xffff0);
 	vmcs_write32(sf->limit, 0xffff);
@@ -2798,6 +2804,12 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	vmx->emulation_required = 1;
 	vmx->rmode.vm86_active = 1;
 
+	save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr);
+	save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
+	save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
+	save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+	save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
+
 	/*
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
 	 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2812,14 +2824,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
 	vmx_segment_cache_clear(vmx);
 
-	vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
-	vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
-
-	vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
-
-	vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-- 
cgit v1.2.3-70-g09d2


From 72fbefec26841699fee9ad0b050624aeb43d5bae Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:06:59 +0300
Subject: KVM: VMX: Fix incorrect lookup of segment S flag in
 fix_pmode_dataseg()

fix_pmode_dataseg() looks up S in ->base instead of ->ar_bytes.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e49caf9224..1d93079432b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2693,11 +2693,11 @@ static __exit void hardware_unsetup(void)
 	free_kvm_area();
 }
 
-static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
+static void fix_pmode_dataseg(int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
-	if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) {
+	if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) {
 		vmcs_write16(sf->selector, save->selector);
 		vmcs_writel(sf->base, save->base);
 		vmcs_write32(sf->limit, save->limit);
-- 
cgit v1.2.3-70-g09d2


From f5f7b2fe3bf849b58c8144729aba78b8e29e1e4c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:00 +0300
Subject: KVM: VMX: Use kvm_segment to save protected-mode segments when
 entering realmode

Instead of using struct kvm_save_segment, use struct kvm_segment, which is what
the other APIs use.  This leads to some simplification.

We replace save_rmode_seg() with a call to vmx_save_segment().  Since this depends
on rmode.vm86_active, we move the call to before setting the flag.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 85 +++++++++++++++---------------------------------------
 1 file changed, 24 insertions(+), 61 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d93079432b..7e95ff68b9d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -405,16 +405,16 @@ struct vcpu_vmx {
 	struct {
 		int vm86_active;
 		ulong save_rflags;
+		struct kvm_segment segs[8];
+	} rmode;
+	struct {
+		u32 bitmask; /* 4 bits per segment (1 bit per field) */
 		struct kvm_save_segment {
 			u16 selector;
 			unsigned long base;
 			u32 limit;
 			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	struct {
-		u32 bitmask; /* 4 bits per segment (1 bit per field) */
-		struct kvm_save_segment seg[8];
+		} seg[8];
 	} segment_cache;
 	int vpid;
 	bool emulation_required;
@@ -2693,15 +2693,12 @@ static __exit void hardware_unsetup(void)
 	free_kvm_area();
 }
 
-static void fix_pmode_dataseg(int seg, struct kvm_segment *save)
+static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
-	if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) {
-		vmcs_write16(sf->selector, save->selector);
-		vmcs_writel(sf->base, save->base);
-		vmcs_write32(sf->limit, save->limit);
-		vmcs_write32(sf->ar_bytes, save->ar);
+	if (vmcs_readl(sf->base) == save->base && save->s) {
+		vmx_set_segment(vcpu, save, seg);
 	} else {
 		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
 			<< AR_DPL_SHIFT;
@@ -2719,10 +2716,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 	vmx_segment_cache_clear(vmx);
 
-	vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
-	vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
+	vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -2737,10 +2731,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	if (emulate_invalid_guest_state)
 		return;
 
-	fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
 
 	vmx_segment_cache_clear(vmx);
 
@@ -2768,17 +2762,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 	return kvm->arch.tss_addr;
 }
 
-static void save_rmode_seg(int seg, struct kvm_save_segment *save)
-{
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-
-	save->selector = vmcs_read16(sf->selector);
-	save->base = vmcs_readl(sf->base);
-	save->limit = vmcs_read32(sf->limit);
-	save->ar = vmcs_read32(sf->ar_bytes);
-}
-
-static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
@@ -2801,14 +2785,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	if (enable_unrestricted_guest)
 		return;
 
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+
 	vmx->emulation_required = 1;
 	vmx->rmode.vm86_active = 1;
 
-	save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr);
-	save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
-	save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
-	save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
-	save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
 
 	/*
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
@@ -3118,7 +3103,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_save_segment *save;
 	u32 ar;
 
 	if (vmx->rmode.vm86_active
@@ -3126,27 +3110,15 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 		|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
 		|| seg == VCPU_SREG_GS)
 	    && !emulate_invalid_guest_state) {
-		switch (seg) {
-		case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
-		case VCPU_SREG_ES: save = &vmx->rmode.es; break;
-		case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
-		case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
-		case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
-		default: BUG();
-		}
-		var->selector = save->selector;
-		var->base = save->base;
-		var->limit = save->limit;
-		ar = save->ar;
+		*var = vmx->rmode.segs[seg];
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
-			goto use_saved_rmode_seg;
+			return;
 	}
 	var->base = vmx_read_guest_seg_base(vmx, seg);
 	var->limit = vmx_read_guest_seg_limit(vmx, seg);
 	var->selector = vmx_read_guest_seg_selector(vmx, seg);
 	ar = vmx_read_guest_seg_ar(vmx, seg);
-use_saved_rmode_seg:
 	if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
 		ar = 0;
 	var->type = ar & 15;
@@ -3235,10 +3207,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 
 	if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
 		vmcs_write16(sf->selector, var->selector);
-		vmx->rmode.tr.selector = var->selector;
-		vmx->rmode.tr.base = var->base;
-		vmx->rmode.tr.limit = var->limit;
-		vmx->rmode.tr.ar = vmx_segment_access_rights(var);
+		vmx->rmode.segs[VCPU_SREG_TR] = *var;
 		return;
 	}
 	vmcs_writel(sf->base, var->base);
@@ -3289,16 +3258,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 				     vmcs_readl(GUEST_CS_BASE) >> 4);
 			break;
 		case VCPU_SREG_ES:
-			fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
-			break;
 		case VCPU_SREG_DS:
-			fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
-			break;
 		case VCPU_SREG_GS:
-			fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
-			break;
 		case VCPU_SREG_FS:
-			fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
 			break;
 		case VCPU_SREG_SS:
 			vmcs_write16(GUEST_SS_SELECTOR,
-- 
cgit v1.2.3-70-g09d2


From c865c43de66dc973865bda337022f03b6e16c8df Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:01 +0300
Subject: KVM: VMX: Retain limit and attributes when entering protected mode

Real processors don't change segment limits and attributes while in
real mode.  Mimic that behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7e95ff68b9d..88eeb405560 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2696,14 +2696,14 @@ static __exit void hardware_unsetup(void)
 static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	struct kvm_segment tmp = *save;
 
-	if (vmcs_readl(sf->base) == save->base && save->s) {
-		vmx_set_segment(vcpu, save, seg);
-	} else {
-		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
-			<< AR_DPL_SHIFT;
-		vmcs_write32(sf->ar_bytes, 0x93 | dpl);
+	if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
+		tmp.base = vmcs_readl(sf->base);
+		tmp.selector = vmcs_read16(sf->selector);
+		tmp.s = 1;
 	}
+	vmx_set_segment(vcpu, &tmp, seg);
 }
 
 static void enter_pmode(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3-70-g09d2


From 495e116684cebc5ae625916aba37fc07f345707b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:02 +0300
Subject: KVM: VMX: Allow real mode emulation using vm86 with dpl=0

Real mode is always entered from protected mode with dpl=0.  Since
the dpl doesn't affect execution, and we already override it to 3
in the vmcs (as vmx requires), we can allow execution in that state.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 88eeb405560..4811d91759a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3317,7 +3317,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (var.limit != 0xffff)
 		return false;
-	if (ar != 0xf3)
+	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
 		return false;
 
 	return true;
-- 
cgit v1.2.3-70-g09d2


From e2a610d7fc3e285af8061ff071761752255d95f6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:03 +0300
Subject: KVM: VMX: Allow vm86 virtualization of big real mode

Usually, big real mode uses large (4GB) segments.  Currently we don't
virtualize this; if any segment has a limit other than 0xffff, we emulate.
But if we set the vmx-visible limit to 0xffff, we can use vm86 to virtualize
real mode; if an access overruns the segment limit, the guest will #GP, which
we will trap and forward to the emulator.  This results in significantly
faster execution, and less risk of hitting an unemulated instruction.

If the limit is less than 0xffff, we retain the existing behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4811d91759a..fd21eb45466 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3315,7 +3315,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 
 	if (var.base != (var.selector << 4))
 		return false;
-	if (var.limit != 0xffff)
+	if (var.limit < 0xffff)
 		return false;
 	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
 		return false;
-- 
cgit v1.2.3-70-g09d2


From 03ebebeb1ff5d1d6209fd8df4ffc9204df82bd55 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:04 +0300
Subject: KVM: x86 emulator: Leave segment limit and attributs alone in real
 mode

When loading a segment in real mode, only the base and selector must
be modified.  The limit needs to be left alone, otherwise big real mode
users will hit a #GP due to limit checking (currently this is suppressed
because we don't check limits in real mode).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5e27ba53261..f8b27cd2a6c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1388,19 +1388,15 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
 	ulong desc_addr;
 	int ret;
+	u16 dummy;
 
 	memset(&seg_desc, 0, sizeof seg_desc);
 
 	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
 	    || ctxt->mode == X86EMUL_MODE_REAL) {
 		/* set real mode segment descriptor */
+		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
 		set_desc_base(&seg_desc, selector << 4);
-		set_desc_limit(&seg_desc, 0xffff);
-		seg_desc.type = 3;
-		seg_desc.p = 1;
-		seg_desc.s = 1;
-		if (ctxt->mode == X86EMUL_MODE_VM86)
-			seg_desc.dpl = 3;
 		goto load;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a5625189f6810ef79ced53989c794acfa10d3370 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:05 +0300
Subject: KVM: x86 emulator: Check segment limits in real mode too

Segment limits are verified in real mode, not just protected mode.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f8b27cd2a6c..5b1c701cd6d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -668,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	switch (ctxt->mode) {
-	case X86EMUL_MODE_REAL:
-		break;
 	case X86EMUL_MODE_PROT64:
 		if (((signed long)la << 16) >> 16 != la)
 			return emulate_gp(ctxt, 0);
@@ -699,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 				goto bad;
 		}
 		cpl = ctxt->ops->cpl(ctxt);
-		rpl = sel & 3;
+		if (ctxt->mode == X86EMUL_MODE_REAL)
+			rpl = 0;
+		else
+			rpl = sel & 3;
 		cpl = max(cpl, rpl);
 		if (!(desc.type & 8)) {
 			/* data segment */
-- 
cgit v1.2.3-70-g09d2


From 0afbe2f8781a812c7e501ec129eff45b21f792af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:06 +0300
Subject: KVM: x86 emulator: Fix #GP error code during linearization

We want the segment selector, nor segment number.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5b1c701cd6d..1451cffd97e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -725,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 bad:
 	if (addr.seg == VCPU_SREG_SS)
-		return emulate_ss(ctxt, addr.seg);
+		return emulate_ss(ctxt, sel);
 	else
-		return emulate_gp(ctxt, addr.seg);
+		return emulate_gp(ctxt, sel);
 }
 
 static int linearize(struct x86_emulate_ctxt *ctxt,
-- 
cgit v1.2.3-70-g09d2


From 726364202853f843a97a8ba4a7c3cd91e3aa84b7 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:07 +0300
Subject: KVM: VMX: Return real real-mode segment data even if
 emulate_invalid_guest_state=1

emulate_invalid_guest_state=1 doesn't mean we don't munge the segments in the
vmcs; we do.  So we need to return the real ones (maintained by vmx_set_segment).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fd21eb45466..0d6872621ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3108,8 +3108,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 	if (vmx->rmode.vm86_active
 	    && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
 		|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
-		|| seg == VCPU_SREG_GS)
-	    && !emulate_invalid_guest_state) {
+		|| seg == VCPU_SREG_GS)) {
 		*var = vmx->rmode.segs[seg];
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
-- 
cgit v1.2.3-70-g09d2


From 1390a28b274e2e45f89bac67c435cbcbc5cc0790 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:08 +0300
Subject: KVM: VMX: Preserve segment limit and access rights in real mode

While this is undocumented, real processors do not reload the segment
limit and access rights when loading a segment register in real mode.
Real programs rely on it so we need to comply with this behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0d6872621ab..6e6421aeca0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3113,6 +3113,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
 			return;
+		var->base = vmx_read_guest_seg_base(vmx, seg);
+		var->selector = vmx_read_guest_seg_selector(vmx, seg);
+		return;
 	}
 	var->base = vmx_read_guest_seg_base(vmx, seg);
 	var->limit = vmx_read_guest_seg_limit(vmx, seg);
-- 
cgit v1.2.3-70-g09d2


From ce5668034752e52e995c8dc625337380099a088a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:09 +0300
Subject: KVM: VMX: Save all segment data in real mode

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e6421aeca0..62b2d0cf2a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3216,6 +3216,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
 	if (vmx->rmode.vm86_active && var->s) {
+		vmx->rmode.segs[seg] = *var;
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
-- 
cgit v1.2.3-70-g09d2


From a81aba14dc0ea499f4c218b5db0303b2ea8151d3 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:10 +0300
Subject: KVM: VMX: Ignore segment G and D bits when considering whether we can
 virtualize

We will enter the guest with G and D cleared; as real hardware ignores D in
real mode, and G is taken care of by the limit test, we allow more code to
run in vm86 mode.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 62b2d0cf2a7..248c2b490e9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3320,7 +3320,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (var.limit < 0xffff)
 		return false;
-	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
+	if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
 		return false;
 
 	return true;
-- 
cgit v1.2.3-70-g09d2


From 9a7819774e4236e8736a074b7e85276967911924 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 30 Aug 2012 17:45:54 -0300
Subject: KVM: x86: remove unused variable from kvm_task_switch()

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e00050ce7a6..20f2266dfb6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5714,7 +5714,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
-	unsigned reg;
 
 	init_emulate_ctxt(vcpu);
 
-- 
cgit v1.2.3-70-g09d2


From e09a8417823992700c063f9b8f85119f2d9016ed Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@kudzu.us>
Date: Tue, 10 Jul 2012 15:31:30 -0700
Subject: hpet: Remove unused PCI Vendor ID #define

HPET_ID_VENDOR_8086 is defined but never used.  It would be a redefine
of PCI_VENDOR_ID_INTEL if it was ever used.

Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/x86/include/asm/hpet.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 2c392d663dc..434e2106cc8 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -35,8 +35,6 @@
 #define	HPET_ID_NUMBER_SHIFT	8
 #define HPET_ID_VENDOR_SHIFT	16
 
-#define HPET_ID_VENDOR_8086	0x8086
-
 #define HPET_CFG_ENABLE		0x001
 #define HPET_CFG_LEGACY		0x002
 #define	HPET_LEGACY_8254	2
-- 
cgit v1.2.3-70-g09d2


From ec798660cf72c981ad8eed272487a0fe2b3222f2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 14:47:25 +0300
Subject: KVM: cleanup pic reset

kvm_pic_reset() is not used anywhere. Move reset logic from
pic_ioport_write() there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 52 +++++++++++-----------------------------------------
 1 file changed, 11 insertions(+), 41 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 90c84f947d4..848206df096 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 {
 	int irq, i;
 	struct kvm_vcpu *vcpu;
-	u8 irr = s->irr, isr = s->imr;
+	u8 edge_irr = s->irr & ~s->elcr;
 	bool found = false;
 
 	s->last_irr = 0;
-	s->irr = 0;
+	s->irr &= s->elcr;
 	s->imr = 0;
-	s->isr = 0;
 	s->priority_add = 0;
-	s->irq_base = 0;
-	s->read_reg_select = 0;
-	s->poll = 0;
 	s->special_mask = 0;
-	s->init_state = 0;
-	s->auto_eoi = 0;
-	s->rotate_on_auto_eoi = 0;
-	s->special_fully_nested_mode = 0;
-	s->init4 = 0;
+	s->read_reg_select = 0;
+	if (!s->init4) {
+		s->special_fully_nested_mode = 0;
+		s->auto_eoi = 0;
+	}
+	s->init_state = 1;
 
 	kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
 		if (kvm_apic_accept_pic_intr(vcpu)) {
@@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 		return;
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
-		if (irr & (1 << irq) || isr & (1 << irq))
+		if (edge_irr & (1 << irq))
 			pic_clear_isr(s, irq);
 }
 
@@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 	addr &= 1;
 	if (addr == 0) {
 		if (val & 0x10) {
-			u8 edge_irr = s->irr & ~s->elcr;
-			int i;
-			bool found;
-			struct kvm_vcpu *vcpu;
-
 			s->init4 = val & 1;
-			s->last_irr = 0;
-			s->irr &= s->elcr;
-			s->imr = 0;
-			s->priority_add = 0;
-			s->special_mask = 0;
-			s->read_reg_select = 0;
-			if (!s->init4) {
-				s->special_fully_nested_mode = 0;
-				s->auto_eoi = 0;
-			}
-			s->init_state = 1;
 			if (val & 0x02)
 				pr_pic_unimpl("single mode not supported");
 			if (val & 0x08)
 				pr_pic_unimpl(
-					"level sensitive irq not supported");
-
-			kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
-				if (kvm_apic_accept_pic_intr(vcpu)) {
-					found = true;
-					break;
-				}
-
-
-			if (found)
-				for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
-					if (edge_irr & (1 << irq))
-						pic_clear_isr(s, irq);
+						"level sensitive irq not supported");
+			kvm_pic_reset(s);
 		} else if (val & 0x08) {
 			if (val & 0x04)
 				s->poll = 1;
-- 
cgit v1.2.3-70-g09d2


From 749c59fd15b2c18dd6c15c353a899fb6ac49b865 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Thu, 30 Aug 2012 11:32:13 +0100
Subject: KVM: PIC: fix use of uninitialised variable.

Commit aea218f3cbbc (KVM: PIC: call ack notifiers for irqs that are
dropped form irr) used an uninitialised variable to track whether an
appropriate apic had been found.  This could result in calling the ack
notifier incorrectly.

Cc: Gleb Natapov <gleb@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e498b18f010..9fc9aa7ac70 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -318,7 +318,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 		if (val & 0x10) {
 			u8 edge_irr = s->irr & ~s->elcr;
 			int i;
-			bool found;
+			bool found = false;
 			struct kvm_vcpu *vcpu;
 
 			s->init4 = val & 1;
-- 
cgit v1.2.3-70-g09d2


From 3ec18cd8b8f8395d0df604c62ab3bc2cf3a966b4 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 20 Aug 2012 11:24:21 +0200
Subject: perf/x86: Enable Intel Cedarview Atom suppport

This patch enables perf_events support for Intel Cedarview
Atom (model 54) processors. Support includes PEBS and LBR.
Tested on my Atom N2600 netbook.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120820092421.GA11284@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c     | 1 +
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7f2739e03e7..0d3d63afa76 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 28: /* Atom */
+	case 54: /* Cedariew */
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 520b4265fcd..da02e9cc375 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
 	 * to have an operational LBR which can freeze
 	 * on PMU interrupt
 	 */
-	if (boot_cpu_data.x86_mask < 10) {
+	if (boot_cpu_data.x86_model == 28
+	    && boot_cpu_data.x86_mask < 10) {
 		pr_cont("LBR disabled due to erratum");
 		return;
 	}
-- 
cgit v1.2.3-70-g09d2


From 406eae5d70c1a849f4a050f708a459c2349e9dda Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:20:56 -0700
Subject: x86/Kconfig: Update defconfigs to current results of "make
 savedefconfig"

The x86 defconfigs have become somewhat out of date compared to
the current result of "make savedefconfig".  Update them to the
current output, as a prelude to further defconfig changes, to
avoid unrelated noise in those further changes.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/80c8a5fbeaf6cdb72fb78a016013427efee52668.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 12 ++++--------
 arch/x86/configs/x86_64_defconfig | 12 ++++--------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 119db67dcb0..19034087c1b 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
 CONFIG_SUN_PARTITION=y
 CONFIG_KARMA_PARTITION=y
 CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_X86_GENERIC=y
 CONFIG_HPET_TIMER=y
@@ -231,8 +231,6 @@ CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
 CONFIG_HID_GYRATION=y
 CONFIG_LOGITECH_FF=y
 CONFIG_HID_NTRIG=y
@@ -243,11 +241,11 @@ CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_DEBUG=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
@@ -299,7 +296,6 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 76eb2903809..c2c044846c7 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
 CONFIG_SUN_PARTITION=y
 CONFIG_KARMA_PARTITION=y
 CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_CALGARY_IOMMU=y
 CONFIG_NR_CPUS=64
@@ -227,8 +227,6 @@ CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
 CONFIG_HID_GYRATION=y
 CONFIG_LOGITECH_FF=y
 CONFIG_HID_NTRIG=y
@@ -239,11 +237,11 @@ CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_DEBUG=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
@@ -298,7 +295,6 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
-- 
cgit v1.2.3-70-g09d2


From 3fe2cb8f9e9486980ff03d7e020781cfdb028ffa Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:05 -0700
Subject: x86/Kconfig: Switch to ext4 in defconfigs

The current x86 and x86-64 defconfigs do not enable ext4, which
most current distributions default to.  Switch the defconfigs to
ext4, so they will boot on current systems without additional
configuration.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/bd8a359506b7e1287c680823de16d67608ec52fe.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 7 +++----
 arch/x86/configs/x86_64_defconfig | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 19034087c1b..2701b8a8775 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -260,10 +260,9 @@ CONFIG_RTC_CLASS=y
 CONFIG_DMADEVICES=y
 CONFIG_EEEPC_LAPTOP=y
 CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index c2c044846c7..c17614efb98 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -260,10 +260,9 @@ CONFIG_AMD_IOMMU_STATS=y
 CONFIG_INTEL_IOMMU=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-- 
cgit v1.2.3-70-g09d2


From b92f885f31db782608c8b73942111238020b1f4a Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:13 -0700
Subject: x86/Kconfig: Disable CONFIG_CRC_T10DIF in defconfigs

CONFIG_CRC_T10DIF explicitly states that it exists only for use
by out-of-tree modules; anything in-kernel that needs it selects
it.  Thus, compile it out by default.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3aaff7a0af1320427952d411a21b8ded29747a1f.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 1 -
 arch/x86/configs/x86_64_defconfig | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2701b8a8775..d833bb6d016 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -311,4 +311,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_CRYPTO_AES_586=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index c17614efb98..7ddcd99b240 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -309,4 +309,3 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
-- 
cgit v1.2.3-70-g09d2


From be2f328d8616c81b86e4013974608776c317bbeb Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:21 -0700
Subject: x86/Kconfig: Turn off CONFIG_BLK_DEV_RAM

The vast majority of systems either use initramfs or mount a
root filesystem directly from the kernel.  Distros have
defaulted to initramfs for years.  Only highly specialized
systems would use an actual filesystem-image initrd at this
point, and such systems don't rely on defconfig anyway.  Drop
initrd support (and specifically RAM block device support) from
the defconfigs.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2521e983a63595cd7a331236d929577660f89c72.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 2 --
 arch/x86/configs/x86_64_defconfig | 2 --
 2 files changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index d833bb6d016..a6533a29222 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEBUG_DEVRES=y
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ddcd99b240..18f3cc47ab8 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEBUG_DEVRES=y
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
-- 
cgit v1.2.3-70-g09d2


From c206b9dcb1fc41b104db43e98f9b83a8c0c559ae Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:29 -0700
Subject: x86/Kconfig: Turn off DEBUG_NX_TEST module in defconfigs

The x86 defconfigs include exactly one module: test_nx.ko, a
special-purpose module which just exists to do evil things like
executing code off the stack to see if the kernel has enabled NX
support.  Anyone who actually uses that module can easily enable
it themselves, but the vast majority of kernel builds don't need
it; disable it by default.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Link: http://lkml.kernel.org/r/e72faf875e1172fb1cbec5e6d3cd4122df508a97.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 1 -
 arch/x86/configs/x86_64_defconfig | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index a6533a29222..5598547281a 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -298,7 +298,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 18f3cc47ab8..671524d0f6c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -297,7 +297,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
-- 
cgit v1.2.3-70-g09d2


From f00026276ace77dcad1cdf17f696ae4e56e12ee6 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:40 +0200
Subject: x86: Fix __user annotations in asm/sys_ia32.h

Fix the following sparse warnings:

  sys_ia32.c:293:38: warning: incorrect type in argument 2 (different address spaces)
  sys_ia32.c:293:38:    expected unsigned int [noderef] [usertype] <asn:1>*stat_addr
  sys_ia32.c:293:38:    got unsigned int *stat_addr

Ironically, sys_ia32.h was introduced to fix sparse warnings but
missed that one.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Link: http://lkml.kernel.org/r/1346621506-30857-2-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/sys_ia32.c        | 2 +-
 arch/x86/include/asm/sys_ia32.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4540bece094..c5b938d92ea 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
 	return ret;
 }
 
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
 			      int options)
 {
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 3fda9db4881..4ca1c611b55 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
 				struct old_sigaction32 __user *);
 asmlinkage long sys32_alarm(unsigned int);
 
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
 asmlinkage long sys32_sysfs(int, u32, u32);
 
 asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
-- 
cgit v1.2.3-70-g09d2


From 3d1334064fb365ea8f299874c2b4c46de2bee74d Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:41 +0200
Subject: x86/vdso: Add __user annotation to VDSO32_SYMBOL

The address calculated by VDSO32_SYMBOL() is a pointer into
userland. Add the __user annotation to fix related sparse
warnings in its users.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Andy Lutomirski <luto@MIT.EDU>
Link: http://lkml.kernel.org/r/1346621506-30857-3-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/vdso.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index bb0522850b7..fddb53d6391 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[];
 #define VDSO32_SYMBOL(base, name)					\
 ({									\
 	extern const char VDSO32_##name[];				\
-	(void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \
+	(void __user *)(VDSO32_##name - VDSO32_PRELINK +		\
+			(unsigned long)(base));				\
 })
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 0ff8fef4eaf252ee13a2d0b175a8c876415bd62a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:42 +0200
Subject: x86/signals: ia32_signal.c: add __user casts to fix sparse warnings

Fix the following sparse warnings by adding appropriate __user
casts and annotations:

  ia32_signal.c:165:38: warning: incorrect type in argument 1 (different address spaces)
   ia32_signal.c:165:38:    expected struct sigaltstack const [noderef] [usertype] <asn:1>*<noident>
  ia32_signal.c:165:38:    got struct sigaltstack *
  [...]

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/1346621506-30857-4-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32_signal.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 673ac9b63d6..452d4dd0a95 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -162,7 +162,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
 	}
 	seg = get_fs();
 	set_fs(KERNEL_DS);
-	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
+	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
+			     (stack_t __force __user *) &uoss, regs->sp);
 	set_fs(seg);
 	if (ret >= 0 && uoss_ptr)  {
 		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
@@ -361,7 +362,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
  */
 static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 				 size_t frame_size,
-				 void **fpstate)
+				 void __user **fpstate)
 {
 	unsigned long sp;
 
@@ -382,7 +383,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 
 	if (used_math()) {
 		sp = sp - sig_xstate_ia32_size;
-		*fpstate = (struct _fpstate_ia32 *) sp;
+		*fpstate = (struct _fpstate_ia32 __user *) sp;
 		if (save_i387_xstate_ia32(*fpstate) < 0)
 			return (void __user *) -1L;
 	}
@@ -448,7 +449,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		 * These are actually not used anymore, but left because some
 		 * gdb versions depend on them as a marker.
 		 */
-		put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+		put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 
 	if (err)
@@ -529,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		 * Not actually used anymore, but left because some gdb
 		 * versions need it.
 		 */
-		put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+		put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 
 	if (err)
-- 
cgit v1.2.3-70-g09d2


From 04d695a6828bca54d53305246545cd1f8a841ac6 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:43 +0200
Subject: x86/pci/probe_roms: Add missing __iomem annotation to
 pci_map_biosrom()

Stay in sync with the declaration and fix the corresponding
sparse warnings.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Link: http://lkml.kernel.org/r/1346621506-30857-5-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/probe_roms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 0bc72e2069e..d5f15c3f7b2 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
 	return oprom;
 }
 
-void *pci_map_biosrom(struct pci_dev *pdev)
+void __iomem *pci_map_biosrom(struct pci_dev *pdev)
 {
 	struct resource *oprom = find_oprom(pdev);
 
-- 
cgit v1.2.3-70-g09d2


From 5c7d03e99cb1ed449328ed9fba0c632944d39e7e Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:44 +0200
Subject: x86/fpu/xsave: Keep __user annotation in casts

Don't remove the __user annotation of the fpstate pointer, but
drop the superfluous void * cast instead.

This fixes the following sparse warnings:

  xsave.c:135:15: warning: cast removes address space of expression
  xsave.c:135:15: warning: incorrect type in argument 1 (different address spaces)
  xsave.c:135:15:    expected void const volatile [noderef] <asn:1>*<noident>
  [...]

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1346621506-30857-6-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/xsave.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3d3e2070911..9e1a8a7ba6e 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -132,9 +132,9 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf,
 	    fx_sw_user->xstate_size > fx_sw_user->extended_size)
 		return -EINVAL;
 
-	err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
-					    fx_sw_user->extended_size -
-					    FP_XSTATE_MAGIC2_SIZE));
+	err = __get_user(magic2, (__u32 __user *) (fpstate +
+						   fx_sw_user->extended_size -
+						   FP_XSTATE_MAGIC2_SIZE));
 	if (err)
 		return err;
 	/*
-- 
cgit v1.2.3-70-g09d2


From 2b11afd1ab502d959ae8d6d5812923151b5bc505 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:45 +0200
Subject: x86/iommu: Drop duplicate const in __IOMMU_INIT

It's redundant and makes sparse complain about it.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1346621506-30857-7-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/iommu_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f229b13a5f3..bbf8fb2e392 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -48,7 +48,7 @@ struct iommu_table_entry {
 
 
 #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
-	static const struct iommu_table_entry const			\
+	static const struct iommu_table_entry				\
 		__iommu_entry_##_detect __used				\
 	__attribute__ ((unused, __section__(".iommu_table"),		\
 			aligned((sizeof(void *)))))	\
-- 
cgit v1.2.3-70-g09d2


From ae13b7b4e041eccf34fa4dd58581fe1441375578 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 2 Sep 2012 23:31:46 +0200
Subject: x86/iommu: Use NULL instead of plain 0 for __IOMMU_INIT

IOMMU_INIT_POST and IOMMU_INIT_POST_FINISH pass the plain value
0 instead of NULL to __IOMMU_INIT. Fix this and make sparse
happy by doing so.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1346621506-30857-8-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/iommu_table.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index bbf8fb2e392..f42a04735a0 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -63,10 +63,10 @@ struct iommu_table_entry {
  * to stop detecting the other IOMMUs after yours has been detected.
  */
 #define IOMMU_INIT_POST(_detect)					\
-	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  0, 0, 0)
+	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  NULL, NULL, 0)
 
 #define IOMMU_INIT_POST_FINISH(detect)					\
-	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  0, 0, 1)
+	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  NULL, NULL, 1)
 
 /*
  * A more sophisticated version of IOMMU_INIT. This variant requires:
-- 
cgit v1.2.3-70-g09d2


From 4454d32749465ffa77d82bc1fdd196d6dedc544b Mon Sep 17 00:00:00 2001
From: Joe Millenbach <jmillenbach@gmail.com>
Date: Sun, 2 Sep 2012 17:38:20 -0700
Subject: x86/kconfig: Remove outdated reference to Intel CPUs in
 CONFIG_SWIOTLB

Deleted the no longer valid example of which x86 CPUs lack a
hardware IOMMU, and moved the "If unsure..." statement to a new
line to follow the style of surrounding options.

Signed-off-by: Joe Millenbach <jmillenbach@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: team-fjord@googlegroups.com
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Link: http://lkml.kernel.org/r/1346632700-29113-1-git-send-email-jmillenbach@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..50a1d1f9b6d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -746,10 +746,10 @@ config SWIOTLB
 	def_bool y if X86_64
 	---help---
 	  Support for software bounce buffers used on x86-64 systems
-	  which don't have a hardware IOMMU (e.g. the current generation
-	  of Intel's x86-64 CPUs). Using this PCI devices which can only
-	  access 32-bits of memory can be used on systems with more than
-	  3 GB of memory. If unsure, say Y.
+	  which don't have a hardware IOMMU. Using this PCI devices
+	  which can only access 32-bits of memory can be used on systems
+	  with more than 3 GB of memory.
+	  If unsure, say Y.
 
 config IOMMU_HELPER
 	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
-- 
cgit v1.2.3-70-g09d2


From 326d07cb30fed2387efccd4bf3bd8e4f28719e9e Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:13 +0200
Subject: KVM: x86: minor size optimization

Some fields can be constified and/or made static to reduce code and data
size.

Numbers for a 32 bit build:

        text    data     bss     dec     hex filename
before: 3351      80       0    3431     d67 cpuid.o
 after: 3391       0       0    3391     d3f cpuid.o

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/cpuid.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b496da684bd..ec79e773342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		break;
 	}
 	case KVM_CPUID_SIGNATURE: {
-		char signature[12] = "KVMKVMKVM\0\0";
-		u32 *sigptr = (u32 *)signature;
+		static const char signature[12] = "KVMKVMKVM\0\0";
+		const u32 *sigptr = (const u32 *)signature;
 		entry->eax = KVM_CPUID_FEATURES;
 		entry->ebx = sigptr[0];
 		entry->ecx = sigptr[1];
@@ -484,10 +484,10 @@ struct kvm_cpuid_param {
 	u32 func;
 	u32 idx;
 	bool has_leaf_count;
-	bool (*qualifier)(struct kvm_cpuid_param *param);
+	bool (*qualifier)(const struct kvm_cpuid_param *param);
 };
 
-static bool is_centaur_cpu(struct kvm_cpuid_param *param)
+static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 {
 	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG, i;
 	u32 func;
-	static struct kvm_cpuid_param param[] = {
+	static const struct kvm_cpuid_param param[] = {
 		{ .func = 0, .has_leaf_count = true },
 		{ .func = 0x80000000, .has_leaf_count = true },
 		{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
 	r = 0;
 	for (i = 0; i < ARRAY_SIZE(param); i++) {
-		struct kvm_cpuid_param *ent = &param[i];
+		const struct kvm_cpuid_param *ent = &param[i];
 
 		if (ent->qualifier && !ent->qualifier(ent))
 			continue;
-- 
cgit v1.2.3-70-g09d2


From 89a87c67791b840d815a2028b88cefe6906ed42c Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:14 +0200
Subject: KVM: x86 emulator: use aligned variants of SSE register ops

As the the compiler ensures that the memory operand is always aligned
to a 16 byte memory location, use the aligned variant of MOVDQ for
read_sse_reg() and write_sse_reg().

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1451cffd97e..5a0fee1a19c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -909,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 {
 	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
-	case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
-	case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
-	case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
-	case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
-	case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
-	case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
-	case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
-	case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
+	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
+	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
+	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
+	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
+	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
+	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
+	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
 #ifdef CONFIG_X86_64
-	case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
-	case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
-	case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
-	case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
-	case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
-	case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
-	case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
-	case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
+	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
+	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
+	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
+	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
+	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
+	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
+	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
 #endif
 	default: BUG();
 	}
@@ -937,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 {
 	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
-	case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
-	case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
-	case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
-	case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
-	case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
-	case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
-	case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
-	case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
+	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
+	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
+	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
+	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
+	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
+	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
+	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
 #ifdef CONFIG_X86_64
-	case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
-	case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
-	case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
-	case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
-	case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
-	case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
-	case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
-	case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
+	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
+	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
+	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
+	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
+	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
+	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
+	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
 #endif
 	default: BUG();
 	}
-- 
cgit v1.2.3-70-g09d2


From fd0a0d82083747301f6c8084b4141bb490625016 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:15 +0200
Subject: KVM: x86 emulator: mark opcode tables const

The opcode tables never change at runtime, therefor mark them const.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5a0fee1a19c..fd06f9d6584 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,9 +161,9 @@ struct opcode {
 	u64 intercept : 8;
 	union {
 		int (*execute)(struct x86_emulate_ctxt *ctxt);
-		struct opcode *group;
-		struct group_dual *gdual;
-		struct gprefix *gprefix;
+		const struct opcode *group;
+		const struct group_dual *gdual;
+		const struct gprefix *gprefix;
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 };
@@ -3574,13 +3574,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
 		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
 
-static struct opcode group7_rm1[] = {
+static const struct opcode group7_rm1[] = {
 	DI(SrcNone | Priv, monitor),
 	DI(SrcNone | Priv, mwait),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group7_rm3[] = {
+static const struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
 	II(SrcNone  | Prot | VendorSpecific,	em_vmmcall,	vmmcall),
 	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
@@ -3591,13 +3591,13 @@ static struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
 };
 
-static struct opcode group7_rm7[] = {
+static const struct opcode group7_rm7[] = {
 	N,
 	DIP(SrcNone, rdtscp, check_rdtsc),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group1[] = {
+static const struct opcode group1[] = {
 	I(Lock, em_add),
 	I(Lock | PageTable, em_or),
 	I(Lock, em_adc),
@@ -3608,11 +3608,11 @@ static struct opcode group1[] = {
 	I(0, em_cmp),
 };
 
-static struct opcode group1A[] = {
+static const struct opcode group1A[] = {
 	I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
 };
 
-static struct opcode group3[] = {
+static const struct opcode group3[] = {
 	I(DstMem | SrcImm, em_test),
 	I(DstMem | SrcImm, em_test),
 	I(DstMem | SrcNone | Lock, em_not),
@@ -3623,13 +3623,13 @@ static struct opcode group3[] = {
 	I(SrcMem, em_idiv_ex),
 };
 
-static struct opcode group4[] = {
+static const struct opcode group4[] = {
 	I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
 	I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group5[] = {
+static const struct opcode group5[] = {
 	I(DstMem | SrcNone | Lock,		em_grp45),
 	I(DstMem | SrcNone | Lock,		em_grp45),
 	I(SrcMem | Stack,			em_grp45),
@@ -3639,7 +3639,7 @@ static struct opcode group5[] = {
 	I(SrcMem | Stack,			em_grp45), N,
 };
 
-static struct opcode group6[] = {
+static const struct opcode group6[] = {
 	DI(Prot,	sldt),
 	DI(Prot,	str),
 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
@@ -3647,7 +3647,7 @@ static struct opcode group6[] = {
 	N, N, N, N,
 };
 
-static struct group_dual group7 = { {
+static const struct group_dual group7 = { {
 	II(Mov | DstMem | Priv,			em_sgdt, sgdt),
 	II(Mov | DstMem | Priv,			em_sidt, sidt),
 	II(SrcMem | Priv,			em_lgdt, lgdt),
@@ -3664,7 +3664,7 @@ static struct group_dual group7 = { {
 	EXT(0, group7_rm7),
 } };
 
-static struct opcode group8[] = {
+static const struct opcode group8[] = {
 	N, N, N, N,
 	I(DstMem | SrcImmByte,				em_bt),
 	I(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
@@ -3672,26 +3672,26 @@ static struct opcode group8[] = {
 	I(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
 };
 
-static struct group_dual group9 = { {
+static const struct group_dual group9 = { {
 	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
 }, {
 	N, N, N, N, N, N, N, N,
 } };
 
-static struct opcode group11[] = {
+static const struct opcode group11[] = {
 	I(DstMem | SrcImm | Mov | PageTable, em_mov),
 	X7(D(Undefined)),
 };
 
-static struct gprefix pfx_0f_6f_0f_7f = {
+static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
-static struct gprefix pfx_vmovntpx = {
+static const struct gprefix pfx_vmovntpx = {
 	I(0, em_mov), N, N, N,
 };
 
-static struct opcode opcode_table[256] = {
+static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	I6ALU(Lock, em_add),
 	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
@@ -3808,7 +3808,7 @@ static struct opcode opcode_table[256] = {
 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
 };
 
-static struct opcode twobyte_table[256] = {
+static const struct opcode twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	G(0, group6), GD(0, &group7), N, N,
 	N, I(ImplicitOps | VendorSpecific, em_syscall),
-- 
cgit v1.2.3-70-g09d2


From 0225fb509d51fcf777eb0aa31c304c582e3248fd Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:16 +0200
Subject: KVM: x86 emulator: constify emulate_ops

We never change emulate_ops[] at runtime so it should be r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  2 +-
 arch/x86/kvm/emulate.c             | 22 +++++++++++-----------
 arch/x86/kvm/x86.c                 |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 282aee5d6ac..b5bb73aecc0 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -250,7 +250,7 @@ struct read_cache {
 };
 
 struct x86_emulate_ctxt {
-	struct x86_emulate_ops *ops;
+	const struct x86_emulate_ops *ops;
 
 	/* Register state before/after emulation. */
 	unsigned long eflags;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fd06f9d6584..663e95881bd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1325,7 +1325,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
 				     u16 selector, struct desc_ptr *dt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 
 	if (selector & 1 << 2) {
 		struct desc_struct desc;
@@ -1747,7 +1747,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 
 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	int rc;
 	struct desc_ptr dt;
 	gva_t cs_addr;
@@ -2129,7 +2129,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
 
 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	u32 eax, ebx, ecx, edx;
 
 	/*
@@ -2173,7 +2173,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 
 static int em_syscall(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
@@ -2231,7 +2231,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
@@ -2294,7 +2294,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 
 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	int usermode;
@@ -2357,7 +2357,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 					    u16 port, u16 len)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct tr_seg;
 	u32 base3;
 	int r;
@@ -2476,7 +2476,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_16 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
@@ -2623,7 +2623,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_32 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
@@ -2667,7 +2667,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 				   u16 tss_selector, int idt_index, int reason,
 				   bool has_error_code, u32 error_code)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct curr_tss_desc, next_tss_desc;
 	int ret;
 	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
@@ -4339,7 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	int rc = X86EMUL_CONTINUE;
 	int saved_dst_type = ctxt->dst.type;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20f2266dfb6..0dc066f0428 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4323,7 +4323,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
 	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
 }
 
-static struct x86_emulate_ops emulate_ops = {
+static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
-- 
cgit v1.2.3-70-g09d2


From 0fbe9b0b19fb92eee2cf23c23d63d6b3312681e5 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:17 +0200
Subject: KVM: x86: constify read_write_emulator_ops

We never change those, make them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0dc066f0428..317241619e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3773,14 +3773,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 	return X86EMUL_CONTINUE;
 }
 
-static struct read_write_emulator_ops read_emultor = {
+static const struct read_write_emulator_ops read_emultor = {
 	.read_write_prepare = read_prepare,
 	.read_write_emulate = read_emulate,
 	.read_write_mmio = vcpu_mmio_read,
 	.read_write_exit_mmio = read_exit_mmio,
 };
 
-static struct read_write_emulator_ops write_emultor = {
+static const struct read_write_emulator_ops write_emultor = {
 	.read_write_emulate = write_emulate,
 	.read_write_mmio = write_mmio,
 	.read_write_exit_mmio = write_exit_mmio,
@@ -3791,7 +3791,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
 				       unsigned int bytes,
 				       struct x86_exception *exception,
 				       struct kvm_vcpu *vcpu,
-				       struct read_write_emulator_ops *ops)
+				       const struct read_write_emulator_ops *ops)
 {
 	gpa_t gpa;
 	int handled, ret;
@@ -3840,7 +3840,7 @@ mmio:
 int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 			void *val, unsigned int bytes,
 			struct x86_exception *exception,
-			struct read_write_emulator_ops *ops)
+			const struct read_write_emulator_ops *ops)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	gpa_t gpa;
-- 
cgit v1.2.3-70-g09d2


From f1d248315afc55771c3991b934014daa154d05f1 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:18 +0200
Subject: KVM: x86: more constification

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 arch/x86/kvm/x86.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 18d149d8020..07ad628dadb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -198,7 +198,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 317241619e2..666da13c34f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -817,7 +817,7 @@ static u32 msrs_to_save[] = {
 
 static unsigned num_msrs_to_save;
 
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs[] = {
 	MSR_IA32_TSCDEADLINE,
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
-- 
cgit v1.2.3-70-g09d2


From 772e031899fdf3c7636d66aae9b0b57d1aaebb93 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:19 +0200
Subject: KVM: VMX: constify lookup tables

We use vmcs_field_to_offset_table[], kvm_vmx_segment_fields[] and
kvm_vmx_exit_handlers[] as lookup tables only -- make them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 248c2b490e9..d62b4139a29 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -450,7 +450,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
 				[number##_HIGH] = VMCS12_OFFSET(name)+4
 
-static unsigned short vmcs_field_to_offset_table[] = {
+static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
 	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
 	FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
@@ -666,7 +666,7 @@ static struct vmx_capability {
 		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
 	}
 
-static struct kvm_vmx_segment_field {
+static const struct kvm_vmx_segment_field {
 	unsigned selector;
 	unsigned base;
 	unsigned limit;
@@ -2695,7 +2695,7 @@ static __exit void hardware_unsetup(void)
 
 static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	struct kvm_segment tmp = *save;
 
 	if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
@@ -2764,7 +2764,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
 	vmcs_write16(sf->selector, save->base >> 4);
 	vmcs_write32(sf->base, save->base & 0xffff0);
@@ -3202,7 +3202,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
 	vmx_segment_cache_clear(vmx);
@@ -3572,7 +3572,7 @@ out:
 
 static void seg_setup(int seg)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	unsigned int ar;
 
 	vmcs_write16(sf->selector, 0);
@@ -5655,7 +5655,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
  * to be done to userspace and return 0.
  */
-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
 	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
-- 
cgit v1.2.3-70-g09d2


From 09941fbb712655cde9b350852be7a99a6f61a03f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:20 +0200
Subject: KVM: SVM: constify lookup tables

We never modify direct_access_msrs[], msrpm_ranges[],
svm_exit_handlers[] or x86_intercept_map[] at runtime.
Mark them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31be4a55744..611c72875fb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 
 #define MSR_INVALID			0xffffffffU
 
-static struct svm_direct_access_msrs {
+static const struct svm_direct_access_msrs {
 	u32 index;   /* Index of the MSR */
 	bool always; /* True if intercept is always on */
 } direct_access_msrs[] = {
@@ -400,7 +400,7 @@ struct svm_init_data {
 	int r;
 };
 
-static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
+static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 
 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
 #define MSRS_RANGE_SIZE 2048
@@ -3267,7 +3267,7 @@ static int pause_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
+static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
 	[SVM_EXIT_READ_CR4]			= cr_interception,
@@ -4068,7 +4068,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
 #define POST_MEM(exit) { .exit_code = (exit), \
 			.stage = X86_ICPT_POST_MEMACCESS, }
 
-static struct __x86_intercept {
+static const struct __x86_intercept {
 	u32 exit_code;
 	enum x86_intercept_stage stage;
 } x86_intercept_map[] = {
-- 
cgit v1.2.3-70-g09d2


From 2df72e9bc4c505d8357012f2924589f3d16f9d44 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 24 Aug 2012 15:54:57 -0300
Subject: KVM: split kvm_arch_flush_shadow

Introducing kvm_arch_flush_shadow_memslot, to invalidate the
translations of a single memory slot.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 8 +++++++-
 arch/powerpc/kvm/powerpc.c | 6 +++++-
 arch/s390/kvm/kvm-s390.c   | 7 ++++++-
 arch/x86/kvm/x86.c         | 8 +++++++-
 include/linux/kvm_host.h   | 6 +++++-
 virt/kvm/kvm_main.c        | 8 ++++----
 6 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index eac65380bd2..8b3a9c0e771 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1613,11 +1613,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
 }
 
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_arch_flush_shadow_all();
+}
+
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 879b14a6140..4d213b8b0fb 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -334,8 +334,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem);
 }
 
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
 {
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e83df7f0fed..ecced9d1898 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -969,7 +969,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
 {
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 666da13c34f..37797a090a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6447,12 +6447,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_mmu_zap_all(kvm);
 	kvm_reload_remote_mmus(kvm);
 }
 
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_arch_flush_shadow_all(kvm);
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9c0b3c3ae0a..40791930bc1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -458,7 +458,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				int user_alloc);
 bool kvm_largepages_enabled(void);
 void kvm_disable_largepages(void);
-void kvm_arch_flush_shadow(struct kvm *kvm);
+/* flush all memory translations */
+void kvm_arch_flush_shadow_all(struct kvm *kvm);
+/* flush memory translations pointing to 'slot' */
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot);
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 			    int nr_pages);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6425906d7ce..a4bf05be5fe 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -408,7 +408,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	int idx;
 
 	idx = srcu_read_lock(&kvm->srcu);
-	kvm_arch_flush_shadow(kvm);
+	kvm_arch_flush_shadow_all(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
@@ -582,7 +582,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
-	kvm_arch_flush_shadow(kvm);
+	kvm_arch_flush_shadow_all(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	kvm_free_physmem(kvm);
@@ -814,7 +814,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
 		 * 	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
-		kvm_arch_flush_shadow(kvm);
+		kvm_arch_flush_shadow_memslot(kvm, slot);
 		kfree(old_memslots);
 	}
 
@@ -854,7 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	 * mmio sptes.
 	 */
 	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
-		kvm_arch_flush_shadow(kvm);
+		kvm_arch_flush_shadow_all(kvm);
 
 	kvm_free_physmem_slot(&old, &new);
 	kfree(old_memslots);
-- 
cgit v1.2.3-70-g09d2


From 3b4dc3a031110753b9ba36432dbd21f989fcee56 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 28 Aug 2012 17:43:26 -0300
Subject: KVM: move postcommit flush to x86, as mmio sptes are x86 specific

Other arches do not need this.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

v2: fix incorrect deletion of mmio sptes on gpa move (noticed by Takuya)
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 8 ++++++++
 virt/kvm/kvm_main.c | 7 -------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 37797a090a8..6f6812ec841 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6445,6 +6445,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
+	/*
+	 * If memory slot is created, or moved, we need to clear all
+	 * mmio sptes.
+	 */
+	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
+		kvm_mmu_zap_all(kvm);
+		kvm_reload_remote_mmus(kvm);
+	}
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f41ea1262d5..4fe02d90081 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -849,13 +849,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	/*
-	 * If the new memory slot is created, we need to clear all
-	 * mmio sptes.
-	 */
-	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
-		kvm_arch_flush_shadow_all(kvm);
-
 	kvm_free_physmem_slot(&old, &new);
 	kfree(old_memslots);
 
-- 
cgit v1.2.3-70-g09d2


From 716d51abff06f48425cef15d78ca6f36093f6dbf Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:26 +0300
Subject: KVM: Provide userspace IO exit completion callback

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 93 +++++++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc0e752e756..64adb6117e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,6 +414,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f6812ec841..f91e2c9d7cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4544,6 +4544,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4614,13 +4617,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5476,6 +5482,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5492,47 +5516,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5559,9 +5573,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	} else
+		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
 	r = __vcpu_run(vcpu);
 
-- 
cgit v1.2.3-70-g09d2


From 9d1b39a967871b7c69025dba7b7bdaee42871021 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:27 +0300
Subject: KVM: emulator: make x86 emulation modes enum instead of defines

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h | 22 ++++++++++------------
 arch/x86/kvm/emulate.c             |  4 +++-
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b5bb73aecc0..e9e5675c0df 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -249,6 +249,15 @@ struct read_cache {
 	unsigned long end;
 };
 
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+	X86EMUL_MODE_REAL,	/* Real mode.             */
+	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
+	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
+	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
+	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
+};
+
 struct x86_emulate_ctxt {
 	const struct x86_emulate_ops *ops;
 
@@ -256,7 +265,7 @@ struct x86_emulate_ctxt {
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	int mode;
+	enum x86emul_mode mode;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
@@ -308,17 +317,6 @@ struct x86_emulate_ctxt {
 #define REPE_PREFIX	0xf3
 #define REPNE_PREFIX	0xf2
 
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL     0	/* Real mode.             */
-#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
-#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
-
-/* any protected mode   */
-#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
-			       X86EMUL_MODE_PROT64)
-
 /* CPUID vendors */
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 663e95881bd..5fe06a8fbeb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2268,6 +2268,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 		if (msr_data == 0x0)
 			return emulate_gp(ctxt, 0);
 		break;
+	default:
+		break;
 	}
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -4400,7 +4402,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/* Instruction can only be executed in protected mode */
-	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+	if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}
-- 
cgit v1.2.3-70-g09d2


From f3bd64c68a8f1245e3d037f70c6936cd7bb1196b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:28 +0300
Subject: KVM: emulator: string_addr_inc() cleanup

Remove unneeded segment argument. Address structure already has correct
segment which was put there during decode.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5fe06a8fbeb..415f903facd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2790,14 +2790,13 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
-static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
-			    int reg, struct operand *op)
+static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
+		struct operand *op)
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
 	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
-	op->addr.mem.seg = seg;
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -4570,12 +4569,10 @@ writeback:
 	ctxt->dst.type = saved_dst_type;
 
 	if ((ctxt->d & SrcMask) == SrcSI)
-		string_addr_inc(ctxt, seg_override(ctxt),
-				VCPU_REGS_RSI, &ctxt->src);
+		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
 
 	if ((ctxt->d & DstMask) == DstDI)
-		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
-				&ctxt->dst);
+		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-- 
cgit v1.2.3-70-g09d2


From b3356bf0dbb34980620f2f7def7d1b9a0d325225 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:29 +0300
Subject: KVM: emulator: optimize "rep ins" handling

Optimize "rep ins" by allowing emulator to write back more than one
datum at a time. Introduce new operand type OP_MEM_STR which tells
writeback() that dst contains pointer to an array that should be written
back as opposite to just one data element.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  4 +++-
 arch/x86/kvm/emulate.c             | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e9e5675c0df..15f960c06ff 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
 
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-	enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+	enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
 	unsigned int bytes;
+	unsigned int count;
 	union {
 		unsigned long orig_val;
 		u64 orig_val64;
@@ -234,6 +235,7 @@ struct operand {
 		char valptr[sizeof(unsigned long) + 2];
 		sse128_t vec_val;
 		u64 mm_val;
+		void *data;
 	};
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 415f903facd..39171cb307e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 		rc->end = n * size;
 	}
 
-	memcpy(dest, rc->data + rc->pos, size);
-	rc->pos += size;
+	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
+		ctxt->dst.data = rc->data + rc->pos;
+		ctxt->dst.type = OP_MEM_STR;
+		ctxt->dst.count = (rc->end - rc->pos) / size;
+		rc->pos = rc->end;
+	} else {
+		memcpy(dest, rc->data + rc->pos, size);
+		rc->pos += size;
+	}
 	return 1;
 }
 
@@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
+	case OP_MEM_STR:
+		rc = segmented_write(ctxt,
+				ctxt->dst.addr.mem,
+				ctxt->dst.data,
+				ctxt->dst.bytes * ctxt->dst.count);
+		if (rc != X86EMUL_CONTINUE)
+			return rc;
+		break;
 	case OP_XMM:
 		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
 		break;
@@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 		struct operand *op)
 {
-	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
+	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
 	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
@@ -3733,7 +3748,7 @@ static const struct opcode opcode_table[256] = {
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
+	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
@@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpDX:
 		op->type = OP_REG;
@@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpImmFAddr:
 		op->type = OP_IMM;
@@ -4575,8 +4592,14 @@ writeback:
 		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
+		unsigned int count;
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+		if ((ctxt->d & SrcMask) == SrcSI)
+			count = ctxt->src.count;
+		else
+			count = ctxt->dst.count;
+		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
+				-count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
-- 
cgit v1.2.3-70-g09d2


From a50abc3b2b469ee80bc0f9ef5b6d457ef72659a9 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 5 Sep 2012 20:00:52 +0300
Subject: KVM: use symbolic constant for nr interrupts

interrupt_bitmap is KVM_NR_INTERRUPTS bits in size,
so just use that instead of hard-coded constants
and math.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f91e2c9d7cb..c4d451ed157 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2366,7 +2366,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 				    struct kvm_interrupt *irq)
 {
-	if (irq->irq < 0 || irq->irq >= 256)
+	if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
 	if (irqchip_in_kernel(vcpu->kvm))
 		return -ENXIO;
@@ -5793,7 +5793,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+	max_bits = KVM_NR_INTERRUPTS;
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-- 
cgit v1.2.3-70-g09d2


From f94a73f8dd5644f45f9d2e3139608ca83b932d93 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:43 +0300
Subject: crypto: twofish-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies and interleaves instructions better for out-of-order scheduling.

Tested on Intel Core i5-2450M and AMD FX-8100.

tcrypt ECB results:

Intel Core i5-2450M:

size    old-vs-new      new-vs-3way     old-vs-3way
        enc     dec     enc     dec     enc     dec
256     1.12x   1.13x   1.36x   1.37x   1.21x   1.22x
1k      1.14x   1.14x   1.48x   1.49x   1.29x   1.31x
8k      1.14x   1.14x   1.50x   1.52x   1.32x   1.33x

AMD FX-8100:

size    old-vs-new      new-vs-3way     old-vs-3way
        enc     dec     enc     dec     enc     dec
256     1.10x   1.11x   1.01x   1.01x   0.92x   0.91x
1k      1.11x   1.12x   1.08x   1.07x   0.97x   0.96x
8k      1.11x   1.13x   1.10x   1.08x   0.99x   0.97x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Further interleaving improvements for better out-of-order scheduling.

Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 227 +++++++++++++++++-----------
 1 file changed, 142 insertions(+), 85 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 35f45574390..1585abb13dd 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -47,16 +49,22 @@
 #define RC2 %xmm6
 #define RD2 %xmm7
 
-#define RX %xmm8
-#define RY %xmm9
+#define RX0 %xmm8
+#define RY0 %xmm9
+
+#define RX1 %xmm10
+#define RY1 %xmm11
 
-#define RK1 %xmm10
-#define RK2 %xmm11
+#define RK1 %xmm12
+#define RK2 %xmm13
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RT %xmm14
+#define RR %xmm15
+
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -65,6 +73,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RGS1  %r8
 #define RGS1d %r8d
 #define RGS2  %r9
@@ -73,89 +88,123 @@
 #define RGS3d %r10d
 
 
-#define lookup_32bit(t0, t1, t2, t3, src, dst) \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
-	movl		t0(CTX, RID1, 4), dst ## d;  \
-	xorl		t1(CTX, RID2, 4), dst ## d;  \
+#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \
+	movzbl		src ## bl,        RID1d;     \
+	movzbl		src ## bh,        RID2d;     \
 	shrq $16,	src;                         \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
+	movl		t0(CTX, RID1, 4), dst ## d;  \
+	movl		t1(CTX, RID2, 4), RID2d;     \
+	movzbl		src ## bl,        RID1d;     \
+	xorl		RID2d,            dst ## d;  \
+	movzbl		src ## bh,        RID2d;     \
+	interleave_op(il_reg);			     \
 	xorl		t2(CTX, RID1, 4), dst ## d;  \
 	xorl		t3(CTX, RID2, 4), dst ## d;
 
-#define G(a, x, t0, t1, t2, t3) \
-	vmovq		a,    RGI1;               \
-	vpsrldq $8,	a,    x;                  \
-	vmovq		x,    RGI2;               \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define G(gi1, gi2, x, t0, t1, t2, t3) \
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1);  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2);  \
+	\
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none);      \
+	shlq $32,	RGS2;                                        \
+	orq		RGS1, RGS2;                                  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none);      \
+	shlq $32,	RGS1;                                        \
+	orq		RGS1, RGS3;
+
+#define round_head_2(a, b, x1, y1, x2, y2) \
+	vmovq		b ## 1, RGI3;           \
+	vpextrq $1,	b ## 1, RGI4;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
-	shrq $16,	RGI1;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
-	shlq $32,	RGS2;                     \
-	orq		RGS1, RGS2;               \
+	G(RGI1, RGI2, x1, s0, s1, s2, s3);      \
+	vmovq		a ## 2, RGI1;           \
+	vpextrq $1,	a ## 2, RGI2;           \
+	vmovq		RGS2, x1;               \
+	vpinsrq $1,	RGS3, x1, x1;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
-	shrq $16,	RGI2;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
-	shlq $32,	RGS3;                     \
-	orq		RGS1, RGS3;               \
+	G(RGI3, RGI4, y1, s1, s2, s3, s0);      \
+	vmovq		b ## 2, RGI3;           \
+	vpextrq $1,	b ## 2, RGI4;           \
+	vmovq		RGS2, y1;               \
+	vpinsrq $1,	RGS3, y1, y1;           \
 	\
-	vmovq		RGS2, x;                  \
-	vpinsrq $1,	RGS3, x, x;
+	G(RGI1, RGI2, x2, s0, s1, s2, s3);      \
+	vmovq		RGS2, x2;               \
+	vpinsrq $1,	RGS3, x2, x2;           \
+	\
+	G(RGI3, RGI4, y2, s1, s2, s3, s0);      \
+	vmovq		RGS2, y2;               \
+	vpinsrq $1,	RGS3, y2, y2;
 
-#define encround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+#define encround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(b);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
-	vpaddd			x, RK1, x; \
 	vpaddd			y, RK2, y; \
-	vpxor			x, c,   c; \
-	vpsrld $1,		c, x;      \
+	vpsrld $1,		c, RT;     \
 	vpslld $(32 - 1),	c, c;      \
-	vpor			c, x,   c; \
-	vpslld $1,		d, x;      \
-	vpsrld $(32 - 1),	d, d;      \
-	vpor			d, x,   d; \
-	vpxor			d, y,   d;
-
-#define decround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+	vpor			c, RT,  c; \
+	vpxor			d, y,   d; \
+
+#define decround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(a);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
 	vpaddd			y, RK2, y; \
 	vpxor			d, y,   d; \
 	vpsrld $1,		d, y;      \
 	vpslld $(32 - 1),	d, d;      \
 	vpor			d, y,   d; \
-	vpslld $1,		c, y;      \
-	vpsrld $(32 - 1),	c, c;      \
-	vpor			c, y,   c; \
-	vpaddd			x, RK1, x; \
-	vpxor			x, c,   c;
-
-#define encrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
-
-#define decrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define rotate_1l(x) \
+	vpslld $1,		x, RR;     \
+	vpsrld $(32 - 1),	x, x;      \
+	vpor			x, RR,  x;
+
+#define preload_rgi(c) \
+	vmovq			c, RGI1; \
+	vpextrq $1,		c, RGI2;
+
+#define encrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
+
+#define decrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
 
 #define encrypt_cycle(n) \
-	encrypt_round((2*n), RA, RB, RC, RD);       \
-	encrypt_round(((2*n) + 1), RC, RD, RA, RB);
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l);
+
+#define encrypt_cycle_last(n) \
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy);
 
 #define decrypt_cycle(n) \
-	decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
-	decrypt_round((2*n), RA, RB, RC, RD);
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l);
 
+#define decrypt_cycle_last(n) \
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy);
 
 #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	vpunpckldq		x1, x0, t0; \
@@ -216,17 +265,20 @@ __twofish_enc_blk_8way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
 	vmovdqu w(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	preload_rgi(RA1);
+	rotate_1l(RD1);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+	rotate_1l(RD2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	encrypt_cycle(0);
 	encrypt_cycle(1);
@@ -235,26 +287,27 @@ __twofish_enc_blk_8way:
 	encrypt_cycle(4);
 	encrypt_cycle(5);
 	encrypt_cycle(6);
-	encrypt_cycle(7);
+	encrypt_cycle_last(7);
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
+	leaq (4*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor8;
 
-	outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
 __enc_xor8:
-	outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
@@ -269,16 +322,19 @@ twofish_dec_blk_8way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	preload_rgi(RC1);
+	rotate_1l(RA1);
+	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+	rotate_1l(RA2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	decrypt_cycle(7);
 	decrypt_cycle(6);
@@ -287,14 +343,15 @@ twofish_dec_blk_8way:
 	decrypt_cycle(3);
 	decrypt_cycle(2);
 	decrypt_cycle(1);
-	decrypt_cycle(0);
+	decrypt_cycle_last(0);
 
 	vmovdqu (w)(CTX), RK1;
 
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
 	ret;
-- 
cgit v1.2.3-70-g09d2


From ddaea7869d29beb9e0042c96ea52c9cca2afd68a Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:49 +0300
Subject: crypto: cast5-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies, interleaves instructions better for out-of-order scheduling
and merges constant 16-bit rotation with round-key variable rotation.

tcrypt ECB results (128bit key):

Intel Core i5-2450M:

size    old-vs-new      new-vs-generic  old-vs-generic
        enc     dec     enc     dec     enc     dec
256     1.18x   1.18x   2.45x   2.47x   2.08x   2.10x
1k      1.20x   1.20x   2.73x   2.73x   2.28x   2.28x
8k      1.20x   1.19x   2.73x   2.73x   2.28x   2.29x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Improvements to round-key variable rotation handling.
 - Further interleaving improvements for better out-of-order scheduling.

Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 266 ++++++++++++++++++------------
 1 file changed, 160 insertions(+), 106 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 94693c877e3..a41a3aaba22 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -22,7 +24,6 @@
  */
 
 .file "cast5-avx-x86_64-asm_64.S"
-.text
 
 .extern cast5_s1
 .extern cast5_s2
@@ -57,17 +58,19 @@
 #define RX %xmm8
 
 #define RKM  %xmm9
-#define RKRF %xmm10
-#define RKRR %xmm11
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+
+#define R32  %xmm13
+#define R1ST %xmm14
 
-#define RTMP  %xmm12
-#define RMASK %xmm13
-#define R32   %xmm14
+#define RTMP %xmm15
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -76,6 +79,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RFS1  %r8
 #define RFS1d %r8d
 #define RFS2  %r9
@@ -84,60 +94,84 @@
 #define RFS3d %r10d
 
 
-#define lookup_32bit(src, dst, op1, op2, op3) \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
 	movl		s1(, RID1, 4), dst ## d; \
 	op1		s2(, RID2, 4), dst ## d; \
-	shrq $16,	src;                     \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
 	op2		s3(, RID1, 4), dst ## d; \
 	op3		s4(, RID2, 4), dst ## d;
 
-#define F(a, x, op0, op1, op2, op3) \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
 	op0	a,	RKM,  x;                 \
-	vpslld  RKRF,	x,    RTMP;              \
-	vpsrld  RKRR,	x,    x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
 	vpor	RTMP,	x,    x;                 \
 	\
-	vpshufb	RMASK,	x,    x;                 \
-	vmovq		x,    RGI1;              \
-	vpsrldq $8,	x,    x;                 \
-	vmovq		x,    RGI2;              \
-	\
-	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
-	shrq $16,	RGI1;                    \
-	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
-	shlq $32,	RFS2;                    \
-	orq		RFS1, RFS2;              \
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
 	\
-	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
-	shrq $16,	RGI2;                    \
-	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
-	shlq $32,	RFS3;                    \
-	orq		RFS1, RFS3;              \
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
 	\
-	vmovq		RFS2, x;                 \
+	vmovq		RFS2, x;                                   \
 	vpinsrq $1,	RFS3, x, x;
 
-#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
-#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
-#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
 
-#define subround(a, b, x, n, f) \
-	F ## f(b, x);  \
-	vpxor a, x, a;
+#define subround(a1, b1, a2, b2, f) \
+	F ## f ## _2(a1, b1, a2, b2);
 
 #define round(l, r, n, f) \
 	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
-	vpinsrb $0,	(kr+n)(CTX),     RKRF, RKRF; \
+	vpand		R1ST,            RKR,  RKRF; \
 	vpsubq		RKRF,            R32,  RKRR; \
-	subround(l ## 1, r ## 1, RX, n, f);          \
-	subround(l ## 2, r ## 2, RX, n, f);          \
-	subround(l ## 3, r ## 3, RX, n, f);          \
-	subround(l ## 4, r ## 4, RX, n, f);
+	vpsrldq $1,	RKR,             RKR;        \
+	subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
+	subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
+
+#define enc_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR;
 
+#define dec_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR; \
+	vpshufb		.Lbswap128_mask,          RKR, RKR;
 
 #define transpose_2x4(x0, x1, t0, t1) \
 	vpunpckldq		x1, x0, t0; \
@@ -146,37 +180,47 @@
 	vpunpcklqdq		t1, t0, x0; \
 	vpunpckhqdq		t1, t0, x1;
 
-#define inpack_blocks(in, x0, x1, t0, t1) \
+#define inpack_blocks(in, x0, x1, t0, t1, rmask) \
 	vmovdqu (0*4*4)(in),	x0; \
 	vmovdqu (1*4*4)(in),	x1; \
-	vpshufb RMASK, x0,	x0; \
-	vpshufb RMASK, x1,	x1; \
+	vpshufb rmask, 	x0,	x0; \
+	vpshufb rmask, 	x1,	x1; \
 	\
 	transpose_2x4(x0, x1, t0, t1)
 
-#define outunpack_blocks(out, x0, x1, t0, t1) \
+#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \
 	transpose_2x4(x0, x1, t0, t1) \
 	\
-	vpshufb RMASK,	x0, x0;           \
-	vpshufb RMASK,	x1, x1;           \
+	vpshufb rmask,	x0, x0;           \
+	vpshufb rmask,	x1, x1;           \
 	vmovdqu 	x0, (0*4*4)(out); \
 	vmovdqu		x1, (1*4*4)(out);
 
-#define outunpack_xor_blocks(out, x0, x1, t0, t1) \
+#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
 	transpose_2x4(x0, x1, t0, t1) \
 	\
-	vpshufb RMASK,	x0, x0;               \
-	vpshufb RMASK,	x1, x1;               \
+	vpshufb rmask,	x0, x0;               \
+	vpshufb rmask,	x1, x1;               \
 	vpxor		(0*4*4)(out), x0, x0; \
 	vmovdqu 	x0, (0*4*4)(out);     \
 	vpxor		(1*4*4)(out), x1, x1; \
 	vmovdqu	        x1, (1*4*4)(out);
 
+.data
+
 .align 16
 .Lbswap_mask:
 	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lbswap128_mask:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
 .L32_mask:
-	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
 
 .align 16
 .global __cast5_enc_blk_16way
@@ -190,23 +234,24 @@ __cast5_enc_blk_16way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	enc_preload_rkr();
 
-	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
-	leaq (2*4*4)(%rdx), %rax;
-	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	round(RL, RR, 0, 1);
 	round(RR, RL, 1, 2);
@@ -221,8 +266,8 @@ __cast5_enc_blk_16way:
 	round(RL, RR, 10, 2);
 	round(RR, RL, 11, 3);
 
-	movb rr(CTX), %al;
-	testb %al, %al;
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
 	jnz __skip_enc;
 
 	round(RL, RR, 12, 1);
@@ -233,28 +278,30 @@ __cast5_enc_blk_16way:
 __skip_enc:
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
+
+	vmovdqa .Lbswap_mask, RKM;
+	leaq 1*(2*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor16;
 
-	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
 
 __enc_xor16:
-	outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX);
+	outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
 
@@ -269,25 +316,26 @@ cast5_dec_blk_16way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	dec_preload_rkr();
 
-	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
-	leaq (2*4*4)(%rdx), %rax;
-	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
-	movb rr(CTX), %al;
-	testb %al, %al;
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
 	jnz __skip_dec;
 
 	round(RL, RR, 15, 1);
@@ -295,7 +343,7 @@ cast5_dec_blk_16way:
 	round(RL, RR, 13, 2);
 	round(RR, RL, 12, 1);
 
-__skip_dec:
+__dec_tail:
 	round(RL, RR, 11, 3);
 	round(RR, RL, 10, 2);
 	round(RL, RR, 9, 1);
@@ -309,14 +357,20 @@ __skip_dec:
 	round(RL, RR, 1, 2);
 	round(RR, RL, 0, 1);
 
+	vmovdqa .Lbswap_mask, RKM;
 	popq %rbx;
+	popq %rbp;
 
-	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+	leaq 1*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
+
+__skip_dec:
+	vpsrldq $4, RKR, RKR;
+	jmp __dec_tail;
-- 
cgit v1.2.3-70-g09d2


From c09220e1bc97d83cae445cab8dcb057fabd62361 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:54 +0300
Subject: crypto: cast6-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies, interleaves instructions better for out-of-order scheduling
and merges constant 16-bit rotation with round-key variable rotation.

tcrypt ECB results:

Intel Core i5-2450M:

size    old-vs-new      new-vs-generic  old-vs-generic
        enc     dec     enc     dec     enc     dec
256     1.13x   1.19x   2.05x   2.17x   1.82x   1.82x
1k      1.18x   1.21x   2.26x   2.33x   1.93x   1.93x
8k      1.19x   1.19x   2.32x   2.33x   1.95x   1.95x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Improvements to round-key variable rotation handling.
 - Further interleaving improvements for better out-of-order scheduling.

Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 276 ++++++++++++++++++------------
 1 file changed, 162 insertions(+), 114 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index d258ce0d2e0..218d283772f 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -22,7 +24,6 @@
  */
 
 .file "cast6-avx-x86_64-asm_64.S"
-.text
 
 .extern cast6_s1
 .extern cast6_s2
@@ -54,20 +55,21 @@
 #define RC2 %xmm6
 #define RD2 %xmm7
 
-#define RX %xmm8
+#define RX  %xmm8
 
 #define RKM  %xmm9
-#define RKRF %xmm10
-#define RKRR %xmm11
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+#define R32  %xmm13
+#define R1ST %xmm14
 
-#define RTMP  %xmm12
-#define RMASK %xmm13
-#define R32   %xmm14
+#define RTMP %xmm15
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -76,6 +78,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RFS1  %r8
 #define RFS1d %r8d
 #define RFS2  %r9
@@ -84,95 +93,106 @@
 #define RFS3d %r10d
 
 
-#define lookup_32bit(src, dst, op1, op2, op3) \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
 	movl		s1(, RID1, 4), dst ## d; \
 	op1		s2(, RID2, 4), dst ## d; \
-	shrq $16,	src;                     \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
 	op2		s3(, RID1, 4), dst ## d; \
 	op3		s4(, RID2, 4), dst ## d;
 
-#define F(a, x, op0, op1, op2, op3) \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
 	op0	a,	RKM,  x;                 \
-	vpslld  RKRF,	x,    RTMP;              \
-	vpsrld  RKRR,	x,    x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
 	vpor	RTMP,	x,    x;                 \
 	\
-	vpshufb	RMASK,	x,    x;                 \
-	vmovq		x,    RGI1;              \
-	vpsrldq $8,	x,    x;                 \
-	vmovq		x,    RGI2;              \
-	\
-	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
-	shrq $16,	RGI1;                    \
-	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
-	shlq $32,	RFS2;                    \
-	orq		RFS1, RFS2;              \
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
 	\
-	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
-	shrq $16,	RGI2;                    \
-	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
-	shlq $32,	RFS3;                    \
-	orq		RFS1, RFS3;              \
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
 	\
-	vmovq		RFS2, x;                 \
+	vmovq		RFS2, x;                                   \
 	vpinsrq $1,	RFS3, x, x;
 
-#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
-#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
-#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
 
-#define qop(in, out, x, f) \
-	F ## f(in ## 1, x);          \
-	vpxor out ## 1, x, out ## 1; \
-	F ## f(in ## 2, x);          \
-	vpxor out ## 2, x, out ## 2; \
+#define qop(in, out, f) \
+	F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
+
+#define get_round_keys(nn) \
+	vbroadcastss	(km+(4*(nn)))(CTX), RKM;        \
+	vpand		R1ST,               RKR,  RKRF; \
+	vpsubq		RKRF,               R32,  RKRR; \
+	vpsrldq $1,	RKR,                RKR;
 
 #define Q(n) \
-	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RD, RC, RX, 1);                                \
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);        \
 	\
-	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RC, RB, RX, 2);                                \
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
 	\
-	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RB, RA, RX, 3);                                \
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
 	\
-	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RA, RD, RX, 1);
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);
 
 #define QBAR(n) \
-	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RA, RD, RX, 1);                                \
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);        \
 	\
-	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RB, RA, RX, 3);                                \
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
 	\
-	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RC, RB, RX, 2);                                \
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
 	\
-	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RD, RC, RX, 1);
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);
+
+#define shuffle(mask) \
+	vpshufb		mask,            RKR, RKR;
 
+#define preload_rkr(n, do_mask, mask) \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		(kr+n*16)(CTX),           RKR, RKR; \
+	do_mask(mask);
 
 #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	vpunpckldq		x1, x0, t0; \
@@ -185,37 +205,37 @@
 	vpunpcklqdq		x3, t2, x2; \
 	vpunpckhqdq		x3, t2, x3;
 
-#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	vmovdqu (0*4*4)(in),	x0; \
 	vmovdqu (1*4*4)(in),	x1; \
 	vmovdqu (2*4*4)(in),	x2; \
 	vmovdqu (3*4*4)(in),	x3; \
-	vpshufb RMASK, x0,	x0; \
-	vpshufb RMASK, x1,	x1; \
-	vpshufb RMASK, x2,	x2; \
-	vpshufb RMASK, x3,	x3; \
+	vpshufb rmask, x0,	x0; \
+	vpshufb rmask, x1,	x1; \
+	vpshufb rmask, x2,	x2; \
+	vpshufb rmask, x3,	x3; \
 	\
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
-#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	\
-	vpshufb RMASK,		x0, x0;       \
-	vpshufb RMASK,		x1, x1;       \
-	vpshufb RMASK,		x2, x2;       \
-	vpshufb RMASK,		x3, x3;       \
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
 	vmovdqu x0,		(0*4*4)(out); \
 	vmovdqu	x1,		(1*4*4)(out); \
 	vmovdqu	x2,		(2*4*4)(out); \
 	vmovdqu	x3,		(3*4*4)(out);
 
-#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	\
-	vpshufb RMASK,		x0, x0;       \
-	vpshufb RMASK,		x1, x1;       \
-	vpshufb RMASK,		x2, x2;       \
-	vpshufb RMASK,		x3, x3;       \
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
 	vpxor (0*4*4)(out),	x0, x0;       \
 	vmovdqu	x0,		(0*4*4)(out); \
 	vpxor (1*4*4)(out),	x1, x1;       \
@@ -225,11 +245,29 @@
 	vpxor (3*4*4)(out),	x3, x3;       \
 	vmovdqu x3,		(3*4*4)(out);
 
+.data
+
 .align 16
 .Lbswap_mask:
 	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_Q_Q_QBAR_QBAR:
+	.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_dec_Q_Q_Q_Q:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+.Lrkr_dec_Q_Q_QBAR_QBAR:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
+.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
 .L32_mask:
-	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
 
 .align 16
 .global __cast6_enc_blk_8way
@@ -243,28 +281,31 @@ __cast6_enc_blk_8way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
+	preload_rkr(0, dummy, none);
 	Q(0);
 	Q(1);
 	Q(2);
 	Q(3);
+	preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
 	Q(4);
 	Q(5);
 	QBAR(6);
 	QBAR(7);
+	preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
 	QBAR(8);
 	QBAR(9);
 	QBAR(10);
@@ -272,20 +313,22 @@ __cast6_enc_blk_8way:
 
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor8;
 
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
 
 __enc_xor8:
-	outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
 
@@ -300,36 +343,41 @@ cast6_dec_blk_8way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
+	preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
 	Q(11);
 	Q(10);
 	Q(9);
 	Q(8);
+	preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
 	Q(7);
 	Q(6);
 	QBAR(5);
 	QBAR(4);
+	preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
 	QBAR(3);
 	QBAR(2);
 	QBAR(1);
 	QBAR(0);
 
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
-- 
cgit v1.2.3-70-g09d2


From 1ffb72a39a92c1a03b3c732280e924c02c509cd3 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 16:46:59 +0300
Subject: crypto: camellia-x86_64 - fix sparse warnings (constant is so big)

Fix "constant 0xXXXXXXXXXXXXXXXX is so big it's unsigned long" sparse warnings.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/camellia_glue.c | 1376 +++++++++++++++++++--------------------
 1 file changed, 688 insertions(+), 688 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 7a74d7bb326..42ffd2bbab5 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -92,715 +92,715 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 
 /* camellia sboxes */
 const u64 camellia_sp10011110[256] = {
-	0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
-	0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
-	0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
-	0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
-	0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
-	0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
-	0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
-	0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
-	0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
-	0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
-	0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
-	0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
-	0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
-	0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
-	0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
-	0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
-	0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
-	0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
-	0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
-	0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
-	0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
-	0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
-	0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
-	0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
-	0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
-	0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
-	0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
-	0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
-	0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
-	0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
-	0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
-	0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
-	0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
-	0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
-	0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
-	0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
-	0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
-	0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
-	0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
-	0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
-	0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
-	0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
-	0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
-	0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
-	0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
-	0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
-	0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
-	0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
-	0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
-	0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
-	0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
-	0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
-	0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
-	0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
-	0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
-	0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
-	0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
-	0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
-	0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
-	0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
-	0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
-	0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
-	0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
-	0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
-	0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
-	0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
-	0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
-	0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
-	0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
-	0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
-	0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
-	0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
-	0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
-	0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
-	0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
-	0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
-	0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
-	0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
-	0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
-	0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
-	0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
-	0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
-	0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
-	0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
-	0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
-	0x9e00009e9e9e9e00,
+	0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL,
+	0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL,
+	0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL,
+	0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL,
+	0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL,
+	0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL,
+	0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL,
+	0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL,
+	0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL,
+	0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL,
+	0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL,
+	0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL,
+	0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL,
+	0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL,
+	0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL,
+	0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL,
+	0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL,
+	0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL,
+	0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL,
+	0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL,
+	0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL,
+	0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL,
+	0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL,
+	0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL,
+	0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL,
+	0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL,
+	0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL,
+	0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL,
+	0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL,
+	0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL,
+	0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL,
+	0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL,
+	0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL,
+	0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL,
+	0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL,
+	0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL,
+	0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL,
+	0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL,
+	0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL,
+	0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL,
+	0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL,
+	0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL,
+	0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL,
+	0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL,
+	0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL,
+	0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL,
+	0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL,
+	0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL,
+	0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL,
+	0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL,
+	0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL,
+	0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL,
+	0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL,
+	0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL,
+	0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL,
+	0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL,
+	0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL,
+	0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL,
+	0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL,
+	0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL,
+	0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL,
+	0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL,
+	0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL,
+	0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL,
+	0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL,
+	0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL,
+	0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL,
+	0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL,
+	0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL,
+	0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL,
+	0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL,
+	0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL,
+	0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL,
+	0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL,
+	0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL,
+	0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL,
+	0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL,
+	0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL,
+	0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL,
+	0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL,
+	0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL,
+	0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL,
+	0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL,
+	0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL,
+	0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL,
+	0x9e00009e9e9e9e00ULL,
 };
 
 const u64 camellia_sp22000222[256] = {
-	0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
-	0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
-	0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
-	0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
-	0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
-	0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
-	0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
-	0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
-	0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
-	0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
-	0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
-	0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
-	0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
-	0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
-	0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
-	0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
-	0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
-	0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
-	0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
-	0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
-	0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
-	0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
-	0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
-	0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
-	0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
-	0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
-	0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
-	0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
-	0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
-	0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
-	0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
-	0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
-	0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
-	0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
-	0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
-	0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
-	0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
-	0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
-	0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
-	0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
-	0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
-	0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
-	0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
-	0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
-	0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
-	0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
-	0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
-	0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
-	0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
-	0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
-	0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
-	0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
-	0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
-	0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
-	0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
-	0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
-	0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
-	0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
-	0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
-	0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
-	0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
-	0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
-	0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
-	0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
-	0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
-	0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
-	0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
-	0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
-	0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
-	0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
-	0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
-	0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
-	0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
-	0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
-	0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
-	0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
-	0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
-	0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
-	0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
-	0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
-	0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
-	0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
-	0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
-	0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
-	0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
-	0x3d3d0000003d3d3d,
+	0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL,
+	0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL,
+	0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL,
+	0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL,
+	0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL,
+	0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL,
+	0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL,
+	0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL,
+	0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL,
+	0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL,
+	0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL,
+	0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL,
+	0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL,
+	0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL,
+	0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL,
+	0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL,
+	0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL,
+	0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL,
+	0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL,
+	0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL,
+	0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL,
+	0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL,
+	0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL,
+	0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL,
+	0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL,
+	0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL,
+	0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL,
+	0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL,
+	0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL,
+	0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL,
+	0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL,
+	0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL,
+	0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL,
+	0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL,
+	0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL,
+	0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL,
+	0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL,
+	0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL,
+	0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL,
+	0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL,
+	0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL,
+	0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL,
+	0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL,
+	0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL,
+	0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL,
+	0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL,
+	0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL,
+	0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL,
+	0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL,
+	0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL,
+	0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL,
+	0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL,
+	0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL,
+	0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL,
+	0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL,
+	0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL,
+	0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL,
+	0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL,
+	0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL,
+	0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL,
+	0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL,
+	0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL,
+	0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL,
+	0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL,
+	0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL,
+	0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL,
+	0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL,
+	0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL,
+	0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL,
+	0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL,
+	0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL,
+	0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL,
+	0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL,
+	0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL,
+	0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL,
+	0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL,
+	0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL,
+	0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL,
+	0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL,
+	0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL,
+	0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL,
+	0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL,
+	0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL,
+	0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL,
+	0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL,
+	0x3d3d0000003d3d3dULL,
 };
 
 const u64 camellia_sp03303033[256] = {
-	0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
-	0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
-	0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
-	0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
-	0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
-	0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
-	0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
-	0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
-	0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
-	0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
-	0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
-	0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
-	0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
-	0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
-	0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
-	0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
-	0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
-	0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
-	0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
-	0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
-	0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
-	0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
-	0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
-	0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
-	0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
-	0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
-	0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
-	0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
-	0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
-	0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
-	0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
-	0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
-	0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
-	0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
-	0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
-	0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
-	0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
-	0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
-	0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
-	0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
-	0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
-	0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
-	0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
-	0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
-	0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
-	0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
-	0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
-	0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
-	0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
-	0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
-	0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
-	0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
-	0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
-	0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
-	0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
-	0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
-	0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
-	0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
-	0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
-	0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
-	0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
-	0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
-	0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
-	0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
-	0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
-	0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
-	0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
-	0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
-	0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
-	0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
-	0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
-	0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
-	0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
-	0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
-	0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
-	0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
-	0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
-	0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
-	0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
-	0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
-	0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
-	0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
-	0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
-	0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
-	0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
-	0x004f4f004f004f4f,
+	0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL,
+	0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL,
+	0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL,
+	0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL,
+	0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL,
+	0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL,
+	0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL,
+	0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL,
+	0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL,
+	0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL,
+	0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL,
+	0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL,
+	0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL,
+	0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL,
+	0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL,
+	0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL,
+	0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL,
+	0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL,
+	0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL,
+	0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL,
+	0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL,
+	0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL,
+	0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL,
+	0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL,
+	0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL,
+	0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL,
+	0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL,
+	0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL,
+	0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL,
+	0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL,
+	0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL,
+	0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL,
+	0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL,
+	0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL,
+	0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL,
+	0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL,
+	0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL,
+	0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL,
+	0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL,
+	0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL,
+	0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL,
+	0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL,
+	0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL,
+	0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL,
+	0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL,
+	0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL,
+	0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL,
+	0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL,
+	0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL,
+	0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL,
+	0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL,
+	0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL,
+	0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL,
+	0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL,
+	0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL,
+	0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL,
+	0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL,
+	0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL,
+	0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL,
+	0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL,
+	0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL,
+	0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL,
+	0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL,
+	0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL,
+	0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL,
+	0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL,
+	0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL,
+	0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL,
+	0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL,
+	0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL,
+	0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL,
+	0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL,
+	0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL,
+	0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL,
+	0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL,
+	0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL,
+	0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL,
+	0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL,
+	0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL,
+	0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL,
+	0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL,
+	0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL,
+	0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL,
+	0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL,
+	0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL,
+	0x004f4f004f004f4fULL,
 };
 
 const u64 camellia_sp00444404[256] = {
-	0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
-	0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
-	0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
-	0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
-	0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
-	0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
-	0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
-	0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
-	0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
-	0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
-	0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
-	0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
-	0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
-	0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
-	0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
-	0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
-	0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
-	0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
-	0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
-	0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
-	0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
-	0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
-	0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
-	0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
-	0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
-	0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
-	0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
-	0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
-	0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
-	0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
-	0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
-	0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
-	0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
-	0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
-	0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
-	0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
-	0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
-	0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
-	0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
-	0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
-	0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
-	0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
-	0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
-	0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
-	0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
-	0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
-	0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
-	0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
-	0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
-	0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
-	0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
-	0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
-	0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
-	0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
-	0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
-	0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
-	0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
-	0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
-	0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
-	0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
-	0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
-	0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
-	0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
-	0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
-	0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
-	0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
-	0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
-	0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
-	0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
-	0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
-	0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
-	0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
-	0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
-	0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
-	0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
-	0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
-	0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
-	0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
-	0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
-	0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
-	0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
-	0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
-	0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
-	0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
-	0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
-	0x00009e9e9e9e009e,
+	0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL,
+	0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL,
+	0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL,
+	0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL,
+	0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL,
+	0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL,
+	0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL,
+	0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL,
+	0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL,
+	0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL,
+	0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL,
+	0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL,
+	0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL,
+	0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL,
+	0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL,
+	0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL,
+	0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL,
+	0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL,
+	0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL,
+	0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL,
+	0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL,
+	0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL,
+	0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL,
+	0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL,
+	0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL,
+	0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL,
+	0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL,
+	0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL,
+	0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL,
+	0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL,
+	0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL,
+	0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL,
+	0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL,
+	0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL,
+	0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL,
+	0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL,
+	0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL,
+	0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL,
+	0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL,
+	0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL,
+	0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL,
+	0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL,
+	0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL,
+	0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL,
+	0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL,
+	0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL,
+	0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL,
+	0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL,
+	0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL,
+	0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL,
+	0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL,
+	0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL,
+	0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL,
+	0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL,
+	0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL,
+	0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL,
+	0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL,
+	0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL,
+	0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL,
+	0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL,
+	0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL,
+	0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL,
+	0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL,
+	0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL,
+	0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL,
+	0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL,
+	0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL,
+	0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL,
+	0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL,
+	0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL,
+	0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL,
+	0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL,
+	0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL,
+	0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL,
+	0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL,
+	0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL,
+	0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL,
+	0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL,
+	0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL,
+	0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL,
+	0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL,
+	0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL,
+	0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL,
+	0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL,
+	0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL,
+	0x00009e9e9e9e009eULL,
 };
 
 const u64 camellia_sp02220222[256] = {
-	0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
-	0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
-	0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
-	0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
-	0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
-	0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
-	0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
-	0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
-	0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
-	0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
-	0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
-	0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
-	0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
-	0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
-	0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
-	0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
-	0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
-	0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
-	0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
-	0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
-	0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
-	0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
-	0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
-	0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
-	0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
-	0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
-	0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
-	0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
-	0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
-	0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
-	0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
-	0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
-	0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
-	0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
-	0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
-	0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
-	0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
-	0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
-	0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
-	0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
-	0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
-	0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
-	0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
-	0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
-	0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
-	0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
-	0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
-	0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
-	0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
-	0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
-	0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
-	0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
-	0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
-	0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
-	0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
-	0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
-	0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
-	0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
-	0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
-	0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
-	0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
-	0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
-	0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
-	0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
-	0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
-	0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
-	0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
-	0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
-	0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
-	0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
-	0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
-	0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
-	0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
-	0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
-	0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
-	0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
-	0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
-	0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
-	0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
-	0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
-	0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
-	0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
-	0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
-	0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
-	0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
-	0x003d3d3d003d3d3d,
+	0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL,
+	0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL,
+	0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL,
+	0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL,
+	0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL,
+	0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL,
+	0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL,
+	0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL,
+	0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL,
+	0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL,
+	0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL,
+	0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL,
+	0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL,
+	0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL,
+	0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL,
+	0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL,
+	0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL,
+	0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL,
+	0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL,
+	0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL,
+	0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL,
+	0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL,
+	0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL,
+	0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL,
+	0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL,
+	0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL,
+	0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL,
+	0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL,
+	0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL,
+	0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL,
+	0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL,
+	0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL,
+	0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL,
+	0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL,
+	0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL,
+	0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL,
+	0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL,
+	0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL,
+	0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL,
+	0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL,
+	0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL,
+	0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL,
+	0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL,
+	0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL,
+	0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL,
+	0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL,
+	0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL,
+	0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL,
+	0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL,
+	0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL,
+	0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL,
+	0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL,
+	0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL,
+	0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL,
+	0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL,
+	0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL,
+	0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL,
+	0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL,
+	0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL,
+	0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL,
+	0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL,
+	0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL,
+	0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL,
+	0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL,
+	0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL,
+	0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL,
+	0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL,
+	0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL,
+	0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL,
+	0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL,
+	0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL,
+	0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL,
+	0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL,
+	0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL,
+	0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL,
+	0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL,
+	0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL,
+	0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL,
+	0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL,
+	0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL,
+	0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL,
+	0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL,
+	0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL,
+	0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL,
+	0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL,
+	0x003d3d3d003d3d3dULL,
 };
 
 const u64 camellia_sp30333033[256] = {
-	0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
-	0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
-	0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
-	0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
-	0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
-	0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
-	0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
-	0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
-	0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
-	0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
-	0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
-	0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
-	0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
-	0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
-	0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
-	0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
-	0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
-	0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
-	0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
-	0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
-	0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
-	0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
-	0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
-	0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
-	0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
-	0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
-	0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
-	0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
-	0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
-	0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
-	0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
-	0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
-	0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
-	0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
-	0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
-	0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
-	0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
-	0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
-	0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
-	0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
-	0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
-	0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
-	0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
-	0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
-	0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
-	0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
-	0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
-	0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
-	0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
-	0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
-	0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
-	0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
-	0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
-	0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
-	0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
-	0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
-	0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
-	0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
-	0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
-	0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
-	0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
-	0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
-	0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
-	0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
-	0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
-	0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
-	0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
-	0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
-	0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
-	0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
-	0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
-	0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
-	0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
-	0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
-	0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
-	0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
-	0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
-	0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
-	0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
-	0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
-	0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
-	0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
-	0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
-	0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
-	0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
-	0x4f004f4f4f004f4f,
+	0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL,
+	0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL,
+	0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL,
+	0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL,
+	0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL,
+	0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL,
+	0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL,
+	0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL,
+	0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL,
+	0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL,
+	0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL,
+	0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL,
+	0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL,
+	0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL,
+	0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL,
+	0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL,
+	0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL,
+	0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL,
+	0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL,
+	0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL,
+	0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL,
+	0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL,
+	0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL,
+	0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL,
+	0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL,
+	0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL,
+	0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL,
+	0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL,
+	0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL,
+	0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL,
+	0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL,
+	0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL,
+	0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL,
+	0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL,
+	0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL,
+	0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL,
+	0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL,
+	0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL,
+	0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL,
+	0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL,
+	0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL,
+	0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL,
+	0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL,
+	0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL,
+	0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL,
+	0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL,
+	0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL,
+	0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL,
+	0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL,
+	0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL,
+	0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL,
+	0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL,
+	0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL,
+	0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL,
+	0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL,
+	0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL,
+	0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL,
+	0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL,
+	0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL,
+	0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL,
+	0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL,
+	0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL,
+	0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL,
+	0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL,
+	0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL,
+	0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL,
+	0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL,
+	0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL,
+	0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL,
+	0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL,
+	0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL,
+	0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL,
+	0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL,
+	0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL,
+	0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL,
+	0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL,
+	0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL,
+	0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL,
+	0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL,
+	0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL,
+	0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL,
+	0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL,
+	0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL,
+	0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL,
+	0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL,
+	0x4f004f4f4f004f4fULL,
 };
 
 const u64 camellia_sp44044404[256] = {
-	0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
-	0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
-	0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
-	0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
-	0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
-	0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
-	0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
-	0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
-	0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
-	0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
-	0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
-	0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
-	0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
-	0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
-	0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
-	0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
-	0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
-	0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
-	0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
-	0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
-	0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
-	0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
-	0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
-	0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
-	0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
-	0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
-	0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
-	0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
-	0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
-	0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
-	0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
-	0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
-	0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
-	0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
-	0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
-	0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
-	0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
-	0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
-	0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
-	0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
-	0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
-	0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
-	0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
-	0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
-	0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
-	0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
-	0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
-	0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
-	0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
-	0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
-	0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
-	0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
-	0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
-	0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
-	0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
-	0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
-	0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
-	0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
-	0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
-	0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
-	0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
-	0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
-	0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
-	0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
-	0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
-	0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
-	0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
-	0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
-	0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
-	0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
-	0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
-	0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
-	0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
-	0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
-	0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
-	0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
-	0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
-	0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
-	0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
-	0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
-	0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
-	0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
-	0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
-	0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
-	0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
-	0x9e9e009e9e9e009e,
+	0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL,
+	0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL,
+	0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL,
+	0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL,
+	0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL,
+	0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL,
+	0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL,
+	0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL,
+	0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL,
+	0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL,
+	0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL,
+	0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL,
+	0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL,
+	0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL,
+	0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL,
+	0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL,
+	0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL,
+	0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL,
+	0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL,
+	0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL,
+	0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL,
+	0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL,
+	0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL,
+	0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL,
+	0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL,
+	0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL,
+	0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL,
+	0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL,
+	0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL,
+	0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL,
+	0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL,
+	0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL,
+	0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL,
+	0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL,
+	0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL,
+	0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL,
+	0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL,
+	0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL,
+	0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL,
+	0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL,
+	0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL,
+	0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL,
+	0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL,
+	0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL,
+	0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL,
+	0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL,
+	0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL,
+	0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL,
+	0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL,
+	0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL,
+	0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL,
+	0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL,
+	0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL,
+	0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL,
+	0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL,
+	0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL,
+	0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL,
+	0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL,
+	0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL,
+	0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL,
+	0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL,
+	0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL,
+	0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL,
+	0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL,
+	0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL,
+	0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL,
+	0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL,
+	0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL,
+	0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL,
+	0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL,
+	0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL,
+	0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL,
+	0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL,
+	0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL,
+	0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL,
+	0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL,
+	0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL,
+	0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL,
+	0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL,
+	0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL,
+	0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL,
+	0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL,
+	0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL,
+	0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL,
+	0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL,
+	0x9e9e009e9e9e009eULL,
 };
 
 const u64 camellia_sp11101110[256] = {
-	0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
-	0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
-	0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
-	0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
-	0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
-	0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
-	0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
-	0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
-	0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
-	0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
-	0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
-	0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
-	0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
-	0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
-	0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
-	0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
-	0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
-	0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
-	0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
-	0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
-	0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
-	0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
-	0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
-	0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
-	0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
-	0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
-	0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
-	0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
-	0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
-	0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
-	0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
-	0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
-	0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
-	0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
-	0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
-	0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
-	0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
-	0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
-	0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
-	0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
-	0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
-	0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
-	0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
-	0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
-	0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
-	0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
-	0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
-	0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
-	0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
-	0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
-	0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
-	0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
-	0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
-	0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
-	0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
-	0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
-	0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
-	0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
-	0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
-	0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
-	0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
-	0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
-	0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
-	0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
-	0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
-	0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
-	0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
-	0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
-	0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
-	0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
-	0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
-	0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
-	0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
-	0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
-	0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
-	0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
-	0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
-	0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
-	0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
-	0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
-	0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
-	0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
-	0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
-	0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
-	0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
-	0x9e9e9e009e9e9e00,
+	0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL,
+	0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL,
+	0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL,
+	0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL,
+	0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL,
+	0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL,
+	0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL,
+	0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL,
+	0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL,
+	0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL,
+	0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL,
+	0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL,
+	0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL,
+	0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL,
+	0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL,
+	0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL,
+	0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL,
+	0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL,
+	0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL,
+	0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL,
+	0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL,
+	0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL,
+	0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL,
+	0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL,
+	0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL,
+	0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL,
+	0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL,
+	0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL,
+	0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL,
+	0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL,
+	0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL,
+	0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL,
+	0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL,
+	0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL,
+	0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL,
+	0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL,
+	0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL,
+	0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL,
+	0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL,
+	0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL,
+	0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL,
+	0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL,
+	0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL,
+	0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL,
+	0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL,
+	0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL,
+	0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL,
+	0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL,
+	0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL,
+	0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL,
+	0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL,
+	0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL,
+	0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL,
+	0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL,
+	0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL,
+	0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL,
+	0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL,
+	0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL,
+	0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL,
+	0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL,
+	0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL,
+	0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL,
+	0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL,
+	0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL,
+	0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL,
+	0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL,
+	0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL,
+	0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL,
+	0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL,
+	0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL,
+	0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL,
+	0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL,
+	0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL,
+	0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL,
+	0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL,
+	0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL,
+	0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL,
+	0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL,
+	0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL,
+	0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL,
+	0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL,
+	0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL,
+	0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL,
+	0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL,
+	0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL,
+	0x9e9e9e009e9e9e00ULL,
 };
 
 /* key constants */
-- 
cgit v1.2.3-70-g09d2


From d4c9dbc61fe0ca042b835c6f234af12fa5f18310 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 7 Sep 2012 07:54:52 +0100
Subject: x86/mm: Fix range check in tlbflush debugfs interface

Since the shift count settable there is used for shifting values
of type "unsigned long", its value must not match or exceed
BITS_PER_LONG (otherwise the shift operations are undefined).

Similarly, the value must not be negative (but -1 must be
permitted, as that's the value used to distinguish the case of
the fine grained flushing being disabled).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Alex Shi <alex.shi@intel.com>
Link: http://lkml.kernel.org/r/5049B65C020000780009990C@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83e8c0..a085c560b4a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -320,7 +320,7 @@ static ssize_t tlbflush_write_file(struct file *file,
 	if (kstrtos8(buf, 0, &shift))
 		return -EINVAL;
 
-	if (shift > 64)
+	if (shift < -1 || shift >= BITS_PER_LONG)
 		return -EINVAL;
 
 	tlb_flushall_shift = shift;
-- 
cgit v1.2.3-70-g09d2


From 4f97704555672f9ab48ca623561e96a9430bec9a Mon Sep 17 00:00:00 2001
From: "Ren, Yongjie" <yongjie.ren@intel.com>
Date: Fri, 7 Sep 2012 07:36:59 +0000
Subject: KVM: x86: Check INVPCID feature bit in EBX of leaf 7

Checks and operations on the INVPCID feature bit should use EBX
of CPUID leaf 7 instead of ECX.

Signed-off-by: Junjie Mao <junjie.mao@intel.com>
Signed-off-by: Yongjie Ren <yongjien.ren@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03de1b7..002b4a566e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6575,7 +6575,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	/* Exposing INVPCID only when PCID is exposed */
 	best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
 	if (vmx_invpcid_supported() &&
-	    best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
+	    best && (best->ebx & bit(X86_FEATURE_INVPCID)) &&
 	    guest_cpuid_has_pcid(vcpu)) {
 		exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
@@ -6585,7 +6585,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 			     exec_control);
 		if (best)
-			best->ecx &= ~bit(X86_FEATURE_INVPCID);
+			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 05194cfc547528a01d655262c49da3b1311c4b11 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sun, 9 Sep 2012 11:12:04 -0700
Subject: x86, cpufeature: Add feature bit for SMAP

Add CPUID feature bit for Supervisor Mode Access Prevention (SMAP).

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/n/tip-ethzcr5nipikl6hd5q8ssepq@git.kernel.org
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 6b7ee5ff682..633b6176cf6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -209,6 +209,7 @@
 #define X86_FEATURE_RTM		(9*32+11) /* Restricted Transactional Memory */
 #define X86_FEATURE_RDSEED	(9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		(9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP	(9*32+20) /* Supervisor Mode Access Prevention */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
-- 
cgit v1.2.3-70-g09d2


From 3dc9a633f8a65b39c5897874138027328bfb0a94 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 4 Sep 2012 08:28:02 +0000
Subject: acpi-cpufreq: Add support for modern AMD CPUs

The programming model for P-states on modern AMD CPUs is very similar to
that of Intel and VIA. It makes sense to consolidate this support into one
driver rather than duplicating functionality between two of them. This
patch adds support for AMDs with hardware P-state control to acpi-cpufreq.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/include/asm/msr-index.h |  2 ++
 drivers/cpufreq/Kconfig.x86      |  3 ++-
 drivers/cpufreq/acpi-cpufreq.c   | 43 ++++++++++++++++++++++++++++++++++------
 3 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385a..1e1f3eb5863 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,8 @@
 
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PERF_CTL		0xc0010062
 
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 78ff7ee4895..8d12e378a7e 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ
 	help
 	  This driver adds a CPUFreq driver which utilizes the ACPI
 	  Processor Performance States.
-	  This driver also supports Intel Enhanced Speedstep.
+	  This driver also supports Intel Enhanced Speedstep and newer
+	  AMD CPUs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called acpi-cpufreq.
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 56c6c6b4eb4..067a61f06bb 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -54,10 +54,12 @@ MODULE_LICENSE("GPL");
 enum {
 	UNDEFINED_CAPABLE = 0,
 	SYSTEM_INTEL_MSR_CAPABLE,
+	SYSTEM_AMD_MSR_CAPABLE,
 	SYSTEM_IO_CAPABLE,
 };
 
 #define INTEL_MSR_RANGE		(0xffff)
+#define AMD_MSR_RANGE		(0x7)
 
 struct acpi_cpufreq_data {
 	struct acpi_processor_performance *acpi_data;
@@ -82,6 +84,13 @@ static int check_est_cpu(unsigned int cpuid)
 	return cpu_has(cpu, X86_FEATURE_EST);
 }
 
+static int check_amd_hwpstate_cpu(unsigned int cpuid)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
+}
+
 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
 {
 	struct acpi_processor_performance *perf;
@@ -101,7 +110,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 	int i;
 	struct acpi_processor_performance *perf;
 
-	msr &= INTEL_MSR_RANGE;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		msr &= AMD_MSR_RANGE;
+	else
+		msr &= INTEL_MSR_RANGE;
+
 	perf = data->acpi_data;
 
 	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
@@ -115,6 +128,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
 {
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
+	case SYSTEM_AMD_MSR_CAPABLE:
 		return extract_msr(val, data);
 	case SYSTEM_IO_CAPABLE:
 		return extract_io(val, data);
@@ -150,6 +164,7 @@ static void do_drv_read(void *_cmd)
 
 	switch (cmd->type) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
+	case SYSTEM_AMD_MSR_CAPABLE:
 		rdmsr(cmd->addr.msr.reg, cmd->val, h);
 		break;
 	case SYSTEM_IO_CAPABLE:
@@ -174,6 +189,9 @@ static void do_drv_write(void *_cmd)
 		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
 		wrmsr(cmd->addr.msr.reg, lo, hi);
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
+		break;
 	case SYSTEM_IO_CAPABLE:
 		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
 				cmd->val,
@@ -217,6 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask)
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
 		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_AMD_PERF_STATUS;
+		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
 		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
@@ -326,6 +348,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
 		cmd.val = (u32) perf->states[next_perf_state].control;
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
 		cmd.addr.io.port = perf->control_register.address;
@@ -580,12 +607,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
 		pr_debug("HARDWARE addr space\n");
-		if (!check_est_cpu(cpu)) {
-			result = -ENODEV;
-			goto err_unreg;
+		if (check_est_cpu(cpu)) {
+			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+			break;
 		}
-		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
-		break;
+		if (check_amd_hwpstate_cpu(cpu)) {
+			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
+			break;
+		}
+		result = -ENODEV;
+		goto err_unreg;
 	default:
 		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
-- 
cgit v1.2.3-70-g09d2


From f594065faf4f9067c2283a34619fc0714e79a98d Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 4 Sep 2012 08:28:06 +0000
Subject: ACPI: Add fixups for AMD P-state figures

Some AMD systems may round the frequencies in ACPI tables to 100MHz
boundaries. We can obtain the real frequencies from MSRs, so add a quirk
to fix these frequencies up on AMD systems.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/include/asm/msr-index.h |  1 +
 drivers/acpi/processor_perflib.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1e1f3eb5863..fbee9714d9a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,7 @@
 
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
 #define MSR_AMD_PERF_STATUS		0xc0010063
 #define MSR_AMD_PERF_CTL		0xc0010062
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a093dc163a4..836bfe06904 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	return result;
 }
 
+#ifdef CONFIG_X86
+/*
+ * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
+ * in their ACPI data. Calculate the real values and fix up the _PSS data.
+ */
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
+{
+	u32 hi, lo, fid, did;
+	int index = px->control & 0x00000007;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return;
+
+	if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+	    || boot_cpu_data.x86 == 0x11) {
+		rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi);
+		fid = lo & 0x3f;
+		did = (lo >> 6) & 7;
+		if (boot_cpu_data.x86 == 0x10)
+			px->core_frequency = (100 * (fid + 0x10)) >> did;
+		else
+			px->core_frequency = (100 * (fid + 8)) >> did;
+	}
+}
+#else
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {};
+#endif
+
 static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 {
 	int result = 0;
@@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 			goto end;
 		}
 
+		amd_fixup_frequency(px, i);
+
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 				  "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
 				  i,
-- 
cgit v1.2.3-70-g09d2


From 92b5265d38f6a4d33e9d43974f176f18547687d6 Mon Sep 17 00:00:00 2001
From: "Liu, Jinsong" <jinsong.liu@intel.com>
Date: Mon, 10 Sep 2012 06:55:39 +0800
Subject: KVM: Depend on HIGH_RES_TIMERS

KVM lapic timer and tsc deadline timer based on hrtimer,
setting a leftmost node to rb tree and then do hrtimer reprogram.
If hrtimer not configured as high resolution, hrtimer_enqueue_reprogram
do nothing and then make kvm lapic timer and tsc deadline timer fail.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 45c044f0fff..586f0005980 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
+	depends on HIGH_RES_TIMERS
 	# for device assignment:
 	depends on PCI
 	# for TASKSTATS/TASK_DELAY_ACCT:
-- 
cgit v1.2.3-70-g09d2


From 7de5bdc96c372ab875408c86e0099958dba89f56 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 7 Sep 2012 14:15:03 +0800
Subject: KVM: MMU: remove unnecessary check

Checking the return of kvm_mmu_get_page is unnecessary since it is
guaranteed by memory cache

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 399c177212b..aa0b469ee07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2616,11 +2616,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
-			if (!sp) {
-				pgprintk("nonpaging_map: ENOMEM\n");
-				kvm_release_pfn_clean(pfn);
-				return -ENOMEM;
-			}
 
 			mmu_spte_set(iterator.sptep,
 				     __pa(sp->spt)
-- 
cgit v1.2.3-70-g09d2


From 4484141a94f4a5afea6ebc0b2abba0aa1b0ae9d1 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 7 Sep 2012 14:14:20 +0800
Subject: KVM: fix error paths for failed gfn_to_page() calls

This bug was triggered:
[ 4220.198458] BUG: unable to handle kernel paging request at fffffffffffffffe
[ 4220.203907] IP: [<ffffffff81104d85>] put_page+0xf/0x34
......
[ 4220.237326] Call Trace:
[ 4220.237361]  [<ffffffffa03830d0>] kvm_arch_destroy_vm+0xf9/0x101 [kvm]
[ 4220.237382]  [<ffffffffa036fe53>] kvm_put_kvm+0xcc/0x127 [kvm]
[ 4220.237401]  [<ffffffffa03702bc>] kvm_vcpu_release+0x18/0x1c [kvm]
[ 4220.237407]  [<ffffffff81145425>] __fput+0x111/0x1ed
[ 4220.237411]  [<ffffffff8114550f>] ____fput+0xe/0x10
[ 4220.237418]  [<ffffffff81063511>] task_work_run+0x5d/0x88
[ 4220.237424]  [<ffffffff8104c3f7>] do_exit+0x2bf/0x7ca

The test case:

	printf(fmt, ##args);		\
	exit(-1);} while (0)

static int create_vm(void)
{
	int sys_fd, vm_fd;

	sys_fd = open("/dev/kvm", O_RDWR);
	if (sys_fd < 0)
		die("open /dev/kvm fail.\n");

	vm_fd = ioctl(sys_fd, KVM_CREATE_VM, 0);
	if (vm_fd < 0)
		die("KVM_CREATE_VM fail.\n");

	return vm_fd;
}

static int create_vcpu(int vm_fd)
{
	int vcpu_fd;

	vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
	if (vcpu_fd < 0)
		die("KVM_CREATE_VCPU ioctl.\n");
	printf("Create vcpu.\n");
	return vcpu_fd;
}

static void *vcpu_thread(void *arg)
{
	int vm_fd = (int)(long)arg;

	create_vcpu(vm_fd);
	return NULL;
}

int main(int argc, char *argv[])
{
	pthread_t thread;
	int vm_fd;

	(void)argc;
	(void)argv;

	vm_fd = create_vm();
	pthread_create(&thread, NULL, vcpu_thread, (void *)(long)vm_fd);
	printf("Exit.\n");
	return 0;
}

It caused by release kvm->arch.ept_identity_map_addr which is the
error page.

The parent thread can send KILL signal to the vcpu thread when it was
exiting which stops faulting pages and potentially allocating memory.
So gfn_to_pfn/gfn_to_page may fail at this time

Fixed by checking the page before it is used

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 19 ++++++++++++++++---
 arch/x86/kvm/x86.c | 13 ++++++++++---
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 002b4a566e2..b1eb202ee76 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3619,6 +3619,7 @@ static void seg_setup(int seg)
 
 static int alloc_apic_access_page(struct kvm *kvm)
 {
+	struct page *page;
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
@@ -3633,7 +3634,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	if (r)
 		goto out;
 
-	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+	page = gfn_to_page(kvm, 0xfee00);
+	if (is_error_page(page)) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	kvm->arch.apic_access_page = page;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
@@ -3641,6 +3648,7 @@ out:
 
 static int alloc_identity_pagetable(struct kvm *kvm)
 {
+	struct page *page;
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
@@ -3656,8 +3664,13 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	if (r)
 		goto out;
 
-	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
-			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+	page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	kvm->arch.ept_identity_pagetable = page;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 148ed666e31..2966c847d48 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5113,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 			!kvm_event_needs_reinjection(vcpu);
 }
 
-static void vapic_enter(struct kvm_vcpu *vcpu)
+static int vapic_enter(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct page *page;
 
 	if (!apic || !apic->vapic_addr)
-		return;
+		return 0;
 
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return -EFAULT;
 
 	vcpu->arch.apic->vapic_page = page;
+	return 0;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -5430,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	}
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-	vapic_enter(vcpu);
+	r = vapic_enter(vcpu);
+	if (r) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		return r;
+	}
 
 	r = 1;
 	while (r > 0) {
-- 
cgit v1.2.3-70-g09d2


From 280050cc81ccb2e06e4061228ee34c0cc86b1560 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 10 Sep 2012 22:48:33 +0200
Subject: x86 bpf_jit: support MOD operation

commit b6069a9570 (filter: add MOD operation) added generic
support for modulus operation in BPF.

This patch brings JIT support for x86_64

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: George Bakos <gbakos@alpinista.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 33643a8bcbb..106c5782912 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp)
 				}
 				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
 				break;
+			case BPF_S_ALU_MOD_X: /* A %= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 > 0) {
+					/* addrs[pc_ret0 - 1] is start address of target
+					 * (addrs[i] - 6) is the address following this jmp
+					 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
+					 */
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
+								(addrs[i] - 6));
+				} else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
+				}
+				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
+				EMIT2(0xf7, 0xf3);	/* div %ebx */
+				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
+				break;
+			case BPF_S_ALU_MOD_K: /* A %= K; */
+				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
+				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
+				EMIT2(0xf7, 0xf1);	/* div %ecx */
+				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
+				break;
 			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
 				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
 				EMIT(K, 4);
-- 
cgit v1.2.3-70-g09d2


From 2fc136eecd0c647a6b13fcd00d0c41a1a28f35a5 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 12 Sep 2012 12:44:30 +0100
Subject: xen/m2p: do not reuse kmap_op->dev_bus_addr

If the caller passes a valid kmap_op to m2p_add_override, we use
kmap_op->dev_bus_addr to store the original mfn, but dev_bus_addr is
part of the interface with Xen and if we are batching the hypercalls it
might not have been written by the hypervisor yet. That means that later
on Xen will write to it and we'll think that the original mfn is
actually what Xen has written to it.

Rather than "stealing" struct members from kmap_op, keep using
page->index to store the original mfn and add another parameter to
m2p_remove_override to get the corresponding kmap_op instead.
It is now responsibility of the caller to keep track of which kmap_op
corresponds to a particular page in the m2p_override (gntdev, the only
user of this interface that passes a valid kmap_op, is already doing that).

CC: stable@kernel.org
Reported-and-Tested-By: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h     |  3 ++-
 arch/x86/xen/p2m.c                  | 27 +++++++++++----------------
 drivers/block/xen-blkback/blkback.c |  2 +-
 drivers/xen/gntdev.c                |  5 +++--
 drivers/xen/grant-table.c           |  6 ++++--
 include/xen/grant_table.h           |  3 ++-
 6 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 93971e841dd..472b9b78301 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
 			    struct gnttab_map_grant_ref *kmap_op);
-extern int m2p_remove_override(struct page *page, bool clear_pte);
+extern int m2p_remove_override(struct page *page,
+				struct gnttab_map_grant_ref *kmap_op);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 76ba0e97e53..72213da605f 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 		}
-		/* let's use dev_bus_addr to record the old mfn instead */
-		kmap_op->dev_bus_addr = page->index;
-		page->index = (unsigned long) kmap_op;
 	}
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
@@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
-int m2p_remove_override(struct page *page, bool clear_pte)
+int m2p_remove_override(struct page *page,
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 	WARN_ON(!PagePrivate(page));
 	ClearPagePrivate(page);
 
-	if (clear_pte) {
-		struct gnttab_map_grant_ref *map_op =
-			(struct gnttab_map_grant_ref *) page->index;
-		set_phys_to_machine(pfn, map_op->dev_bus_addr);
+	set_phys_to_machine(pfn, page->index);
+	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
 			struct gnttab_unmap_grant_ref *unmap_op;
@@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			 * issued. In this case handle is going to -1 because
 			 * it hasn't been modified yet.
 			 */
-			if (map_op->handle == -1)
+			if (kmap_op->handle == -1)
 				xen_mc_flush();
 			/*
-			 * Now if map_op->handle is negative it means that the
+			 * Now if kmap_op->handle is negative it means that the
 			 * hypercall actually returned an error.
 			 */
-			if (map_op->handle == GNTST_general_error) {
+			if (kmap_op->handle == GNTST_general_error) {
 				printk(KERN_WARNING "m2p_remove_override: "
 						"pfn %lx mfn %lx, failed to modify kernel mappings",
 						pfn, mfn);
@@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			mcs = xen_mc_entry(
 					sizeof(struct gnttab_unmap_grant_ref));
 			unmap_op = mcs.args;
-			unmap_op->host_addr = map_op->host_addr;
-			unmap_op->handle = map_op->handle;
+			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->handle = kmap_op->handle;
 			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
@@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			set_pte_at(&init_mm, address, ptep,
 					pfn_pte(pfn, PAGE_KERNEL));
 			__flush_tlb_single(address);
-			map_op->host_addr = 0;
+			kmap_op->host_addr = 0;
 		}
-	} else
-		set_phys_to_machine(pfn, page->index);
+	}
 
 	/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
 	 * somewhere in this domain, even before being added to the
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 73f196ca713..c6decb901e5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req)
 		invcount++;
 	}
 
-	ret = gnttab_unmap_refs(unmap, pages, invcount, false);
+	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
 	BUG_ON(ret);
 }
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1ffd03bf8e1..7f124160848 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
-				pages, true);
+	err = gnttab_unmap_refs(map->unmap_ops + offset,
+			use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
+			pages);
 	if (err)
 		return err;
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 0bfc1ef1125..006726688ba 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte)
+		      struct gnttab_map_grant_ref *kmap_ops,
+		      struct page **pages, unsigned int count)
 {
 	int i, ret;
 	bool lazy = false;
@@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	}
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i], clear_pte);
+		ret = m2p_remove_override(pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			return ret;
 	}
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 11e27c3af3c..f19fff8650e 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte);
+		      struct gnttab_map_grant_ref *kunmap_ops,
+		      struct page **pages, unsigned int count);
 
 #endif /* __ASM_GNTTAB_H__ */
-- 
cgit v1.2.3-70-g09d2


From ecba9a52acdf20530d561b7634b80c35c308943a Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 5 Sep 2012 19:30:01 +0900
Subject: KVM: x86: lapic: Clean up find_highest_vector() and count_vectors()

find_highest_vector() and count_vectors():
 - Instead of using magic values, define and use proper macros.

find_highest_vector():
 - Remove likely() which is there only for historical reasons and not
   doing correct branch predictions anymore.  Using such heuristics
   to optimize this function is not worth it now.  Let CPUs predict
   things instead.

 - Stop checking word[0] separately.  This was only needed for doing
   likely() optimization.

 - Use for loop, not while, to iterate over the register array to make
   the code clearer.

Note that we actually confirmed that the likely() did wrong predictions
by inserting debug code.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 07ad628dadb..6f9fd633c88 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -66,6 +66,7 @@
 #define APIC_DEST_NOSHORT		0x0
 #define APIC_DEST_MASK			0x800
 #define MAX_APIC_VECTOR			256
+#define APIC_VECTORS_PER_REG		32
 
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
@@ -208,25 +209,30 @@ static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 
 static int find_highest_vector(void *bitmap)
 {
-	u32 *word = bitmap;
-	int word_offset = MAX_APIC_VECTOR >> 5;
+	int vec;
+	u32 *reg;
 
-	while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
-		continue;
+	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
+	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
+		reg = bitmap + REG_POS(vec);
+		if (*reg)
+			return fls(*reg) - 1 + vec;
+	}
 
-	if (likely(!word_offset && !word[0]))
-		return -1;
-	else
-		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
+	return -1;
 }
 
 static u8 count_vectors(void *bitmap)
 {
-	u32 *word = bitmap;
-	int word_offset;
+	int vec;
+	u32 *reg;
 	u8 count = 0;
-	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
-		count += hweight32(word[word_offset << 2]);
+
+	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
+		reg = bitmap + REG_POS(vec);
+		count += hweight32(*reg);
+	}
+
 	return count;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 35534b201c9f115c68962c095b5a9aad204d025f Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 29 Aug 2012 15:01:22 +0200
Subject: perf/x86: Export Sandy Bridge uncore clockticks event in sysfs

This patch exports the clockticks event and its encoding to user level.
The clockticks event was exported for Nehalem/Westmere but not for Sandy
Bridge (client). Given that it uses a special encoding, it needs to be
exported to user tools, so users can do:

  # perf stat -a -C 0 -e uncore_cbox_0/clockticks/ sleep 1

Signed-off-by: Stephane Eranian <eranian@google.com>
Acked-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120829130122.GA32336@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a5571080e7..38e4894165b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
 	}
 }
 
+static struct uncore_event_desc snb_uncore_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+	{ /* end: all zeroes */ },
+};
+
 static struct attribute *snb_uncore_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
 	.constraints	= snb_uncore_cbox_constraints,
 	.ops		= &snb_uncore_msr_ops,
 	.format_group	= &snb_uncore_format_group,
+	.event_descs	= snb_uncore_events,
 };
 
 static struct intel_uncore_type *snb_msr_uncores[] = {
-- 
cgit v1.2.3-70-g09d2


From bad9ac2d7f878a31cf1ae8c1ee3768077d222bcb Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 25 Jul 2012 19:12:45 +0200
Subject: perf/x86/ibs: Check syscall attribute flags

Current implementation simply ignores attribute flags. Thus, there is
no notification to userland of unsupported features. Check syscall's
attribute flags to let userland know if a feature is supported by the
kernel. This is also needed to distinguish between future kernels what
might support a feature.

Cc: <stable@vger.kernel.org> v3.5..
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120910093018.GO8285@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 12 ++++++++++++
 include/linux/perf_event.h               |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 7bfb5bec863..eebd5ffe1bb 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 	return -EOPNOTSUPP;
 }
 
+static const struct perf_event_attr ibs_notsupp = {
+	.exclude_user	= 1,
+	.exclude_kernel	= 1,
+	.exclude_hv	= 1,
+	.exclude_idle	= 1,
+	.exclude_host	= 1,
+	.exclude_guest	= 1,
+};
+
 static int perf_ibs_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event)
 	if (event->pmu != &perf_ibs->pmu)
 		return -ENOENT;
 
+	if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+		return -EINVAL;
+
 	if (config & ~perf_ibs->config_mask)
 		return -EINVAL;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 33ed9d605f9..bdb41612bfe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -274,6 +274,8 @@ struct perf_event_attr {
 	__u64	branch_sample_type; /* enum branch_sample_type */
 };
 
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
 /*
  * Ioctls that can be done on a perf event fd:
  */
-- 
cgit v1.2.3-70-g09d2


From 6eebdda35e6b18d0dddb2a44e34211bd94f0cad6 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 24 Aug 2012 23:58:47 +0400
Subject: x86: Drop unnecessary kernel_eflags variable on 64-bit

On 64 bit x86 we save the current eflags in cpu_init for use in
ret_from_fork. Strictly speaking reserved bits in EFLAGS should
be read as written but in practise it is unlikely that EFLAGS
could ever be extended in this way and the kernel alread clears
any undefined flags early on.

The equivalent 32 bit code simply hard codes 0x0202 as the new
EFLAGS.

This change makes 64 bit use the same mechanism to setup the
initial EFLAGS on fork. Note that 64 bit resets EFLAGS before
calling schedule_tail() as opposed to 32 bit which calls
schedule_tail() first. Therefore the correct value for EFLAGS
has opposite IF bit.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20120824195847.GA31628@moon
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h | 1 -
 arch/x86/kernel/cpu/common.c     | 4 ----
 arch/x86/kernel/entry_64.S       | 2 +-
 3 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bca..9738b39e4eb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
 
 DECLARE_PER_CPU(char *, irq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
-extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fcc..9961e2e2370 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1116,8 +1116,6 @@ void syscall_init(void)
 	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 }
 
-unsigned long kernel_eflags;
-
 /*
  * Copies of the original ist values from the tss are only accessed during
  * debugging, no special alignment required.
@@ -1299,8 +1297,6 @@ void __cpuinit cpu_init(void)
 	fpu_init();
 	xsave_init();
 
-	raw_local_save_flags(kernel_eflags);
-
 	if (is_uv_system())
 		uv_cpu_init();
 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834..b1dac12dc5e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -440,7 +440,7 @@ ENTRY(ret_from_fork)
 
 	LOCK ; btr $TIF_FORK,TI_flags(%r8)
 
-	pushq_cfi kernel_eflags(%rip)
+	pushq_cfi $0x0002
 	popfq_cfi				# reset kernel eflags
 
 	call schedule_tail			# rdi: 'prev' task parameter
-- 
cgit v1.2.3-70-g09d2


From 73e8f3d7e2cb23614d5115703d76d8e54764b641 Mon Sep 17 00:00:00 2001
From: T Makphaibulchoke <tmac@hp.com>
Date: Tue, 28 Aug 2012 21:21:43 -0600
Subject: x86/mm/init.c: Fix devmem_is_allowed() off by one

Fixing an off-by-one error in devmem_is_allowed(), which allows
accesses to physical addresses 0x100000-0x100fff, an extra page
past 1MB.

Signed-off-by: T Makphaibulchoke <tmac@hp.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: yinghai@kernel.org
Cc: tiwai@suse.de
Cc: dhowells@redhat.com
Link: http://lkml.kernel.org/r/1346210503-14276-1-git-send-email-tmac@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e0e6990723e..ab1f6a93b52 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (pagenr <= 256)
+	if (pagenr < 256)
 		return 1;
 	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 1edfbb4153bd29bcf8d2236676238d5237972be1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:04:16 +0100
Subject: x86/64: Adjust types of temporaries used by ffs()/fls()/fls64()

The 64-bit special cases of the former two (the thrird one is
64-bit only anyway) don't need to use "long" temporaries, as the
result will always fit in a 32-bit variable, and the functions
return plain "int". This avoids a few REX prefixes, i.e.
minimally reduces code size.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DE550020000780009A258@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 72f5009deb5..ebaee695394 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -417,10 +417,9 @@ static inline int ffs(int x)
 	 * We cannot do this on 32 bits because at the very least some
 	 * 486 CPUs did not behave this way.
 	 */
-	long tmp = -1;
 	asm("bsfl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (tmp));
+	    : "rm" (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsfl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -459,10 +458,9 @@ static inline int fls(int x)
 	 * We cannot do this on 32 bits because at the very least some
 	 * 486 CPUs did not behave this way.
 	 */
-	long tmp = -1;
 	asm("bsrl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (tmp));
+	    : "rm" (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsrl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -490,13 +488,13 @@ static inline int fls(int x)
 #ifdef CONFIG_X86_64
 static __always_inline int fls64(__u64 x)
 {
-	long bitpos = -1;
+	int bitpos = -1;
 	/*
 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
 	 * dest reg is undefined if x==0, but their CPU architect says its
 	 * value is written to set it to the same as before.
 	 */
-	asm("bsrq %1,%0"
+	asm("bsrq %1,%q0"
 	    : "+r" (bitpos)
 	    : "rm" (x));
 	return bitpos + 1;
-- 
cgit v1.2.3-70-g09d2


From 5870661c091e827973674cc3469b50c959008c2b Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:24:43 +0100
Subject: x86: Prefer TZCNT over BFS

Following a relatively recent compiler change, make use of the
fact that for non-zero input BSF and TZCNT produce the same
result, and that CPUs not knowing of TZCNT will treat the
instruction as BSF (i.e. ignore what looks like a REP prefix to
them). The assumption here is that TZCNT would never have worse
performance than BSF.

For the moment, only do this when the respective generic-CPU
option is selected (as there are no specific-CPU options
covering the CPUs supporting TZCNT), and don't do that when size
optimization was requested.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DEA1B020000780009A277@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ebaee695394..b2af6645ea7 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -347,6 +347,19 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 	 ? constant_test_bit((nr), (addr))	\
 	 : variable_test_bit((nr), (addr)))
 
+#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \
+    && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE)
+/*
+ * Since BSF and TZCNT have sufficiently similar semantics for the purposes
+ * for which we use them here, BMI-capable hardware will decode the prefixed
+ * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while
+ * older hardware will ignore the REP prefix and decode it as 'bsf ...'.
+ */
+# define BSF_PREFIX "rep;"
+#else
+# define BSF_PREFIX
+#endif
+
 /**
  * __ffs - find first set bit in word
  * @word: The word to search
@@ -355,7 +368,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
  */
 static inline unsigned long __ffs(unsigned long word)
 {
-	asm("bsf %1,%0"
+	asm(BSF_PREFIX "bsf %1,%0"
 		: "=r" (word)
 		: "rm" (word));
 	return word;
@@ -369,12 +382,14 @@ static inline unsigned long __ffs(unsigned long word)
  */
 static inline unsigned long ffz(unsigned long word)
 {
-	asm("bsf %1,%0"
+	asm(BSF_PREFIX "bsf %1,%0"
 		: "=r" (word)
 		: "r" (~word));
 	return word;
 }
 
+#undef BSF_PREFIX
+
 /*
  * __fls: find last set bit in word
  * @word: The word to search
-- 
cgit v1.2.3-70-g09d2


From 3120e25efdc0834c88e1c0f8394e2087444f8c19 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:41:45 +0100
Subject: x86/Kconfig: Clean up Kconfig defaults

The main goal here is to have the resulting .config no carry any
options that aren't enabled and can't be (i.e such where the
default is "no" and can't be changed), so that if any such
option later gets a user visible prompt, the user will actually
be prompted on a "make ...oldconfig" rather than keeping the
previously invisible option disabled.

There's a little bit of other trivial cleanup mixed in here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DEE19020000780009A285@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig     | 50 ++++++++++++++++++++++++++++++--------------------
 arch/x86/Kconfig.cpu |  5 +++--
 2 files changed, 33 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..3fb871908ab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -7,11 +7,13 @@ config 64BIT
 	  Say no to build a 32-bit kernel - formerly known as i386
 
 config X86_32
-	def_bool !64BIT
+	def_bool y
+	depends on !64BIT
 	select CLKSRC_I8253
 
 config X86_64
-	def_bool 64BIT
+	def_bool y
+	depends on 64BIT
 	select X86_DEV_DMA_OPS
 
 ### Arch settings
@@ -99,7 +101,8 @@ config X86
 	select GENERIC_STRNLEN_USER
 
 config INSTRUCTION_DECODER
-	def_bool (KPROBES || PERF_EVENTS || UPROBES)
+	def_bool y
+	depends on KPROBES || PERF_EVENTS || UPROBES
 
 config OUTPUT_FORMAT
 	string
@@ -127,13 +130,15 @@ config SBUS
 	bool
 
 config NEED_DMA_MAP_STATE
-       def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
+	def_bool y
+	depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
 
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
 config GENERIC_ISA_DMA
-	def_bool ISA_DMA_API
+	def_bool y
+	depends on ISA_DMA_API
 
 config GENERIC_BUG
 	def_bool y
@@ -150,13 +155,16 @@ config GENERIC_GPIO
 	bool
 
 config ARCH_MAY_HAVE_PC_FDC
-	def_bool ISA_DMA_API
+	def_bool y
+	depends on ISA_DMA_API
 
 config RWSEM_GENERIC_SPINLOCK
-	def_bool !X86_XADD
+	def_bool y
+	depends on !X86_XADD
 
 config RWSEM_XCHGADD_ALGORITHM
-	def_bool X86_XADD
+	def_bool y
+	depends on X86_XADD
 
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
@@ -752,7 +760,8 @@ config SWIOTLB
 	  3 GB of memory. If unsure, say Y.
 
 config IOMMU_HELPER
-	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+	def_bool y
+	depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
 
 config MAXSMP
 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
@@ -1159,10 +1168,12 @@ config X86_PAE
 	  consumes more pagetable space per process.
 
 config ARCH_PHYS_ADDR_T_64BIT
-	def_bool X86_64 || X86_PAE
+	def_bool y
+	depends on X86_64 || X86_PAE
 
 config ARCH_DMA_ADDR_T_64BIT
-	def_bool X86_64 || HIGHMEM64G
+	def_bool y
+	depends on X86_64 || HIGHMEM64G
 
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -1285,8 +1296,8 @@ config ARCH_SELECT_MEMORY_MODEL
 	depends on ARCH_SPARSEMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
-	def_bool X86_64
-	depends on MEMORY_HOTPLUG
+	def_bool y
+	depends on X86_64 && MEMORY_HOTPLUG
 
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
@@ -1975,7 +1986,6 @@ config PCI_MMCONFIG
 
 config PCI_CNB20LE_QUIRK
 	bool "Read CNB20LE Host Bridge Windows" if EXPERT
-	default n
 	depends on PCI && EXPERIMENTAL
 	help
 	  Read the PCI windows out of the CNB20LE host bridge. This allows
@@ -2186,18 +2196,18 @@ config COMPAT
 	depends on IA32_EMULATION || X86_X32
 	select ARCH_WANT_OLD_COMPAT_IPC
 
+if COMPAT
 config COMPAT_FOR_U64_ALIGNMENT
-	def_bool COMPAT
-	depends on X86_64
+	def_bool y
 
 config SYSVIPC_COMPAT
 	def_bool y
-	depends on COMPAT && SYSVIPC
+	depends on SYSVIPC
 
 config KEYS_COMPAT
-	bool
-	depends on COMPAT && KEYS
-	default y
+	def_bool y
+	depends on KEYS
+endif
 
 endmenu
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 706e12e9984..f3b86d0df44 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT
 	default X86_L1_CACHE_SHIFT
 
 config X86_CMPXCHG
-	def_bool X86_64 || (X86_32 && !M386)
+	def_bool y
+	depends on X86_64 || (X86_32 && !M386)
 
 config X86_L1_CACHE_SHIFT
 	int
@@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT
 
 config X86_XADD
 	def_bool y
-	depends on X86_64 || !M386
+	depends on !M386
 
 config X86_PPRO_FENCE
 	bool "PentiumPro memory ordering errata workaround"
-- 
cgit v1.2.3-70-g09d2


From a5e37863ab31d78faddff15675c89979792bc0bd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:00 +0900
Subject: ftrace/x86: Adjust x86 regs.ip as like as x86-64

Adjust x86 regs.ip to ip + MCOUNT_INSN_SIZE as like as
on x86-64. This helps us to consolidate codes which use
regs->ip on both of x86/x86-64.

Link: http://lkml.kernel.org/r/20120905143100.10329.60109.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 061ac17ee97..f438a44bf8f 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1148,7 +1148,6 @@ ENTRY(ftrace_regs_caller)
 	 * ip location, and move flags into the return ip location.
 	 */
 	pushl 4(%esp)	/* save return ip into ip slot */
-	subl $MCOUNT_INSN_SIZE, (%esp)	/* Adjust ip */
 
 	pushl $0	/* Load 0 into orig_ax */
 	pushl %gs
@@ -1169,6 +1168,7 @@ ENTRY(ftrace_regs_caller)
 	movl $__KERNEL_CS,13*4(%esp)
 
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
+	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
 	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
 	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
 	pushl %esp		/* Save pt_regs as 4th parameter */
@@ -1180,7 +1180,6 @@ GLOBAL(ftrace_regs_call)
 	movl 14*4(%esp), %eax	/* Move flags back into cs */
 	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
 	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
-	addl $MCOUNT_INSN_SIZE, %eax
 	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
 
 	popl %ebx
-- 
cgit v1.2.3-70-g09d2


From 4b036d54bf849a75d0103b33d92a53f89ecb9315 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:12 +0900
Subject: kprobes/x86: Fix kprobes to collectly handle IP on ftrace

Current kprobe_ftrace_handler expects regs->ip == ip, but it is
incorrect (originally on x86-64). Actually, ftrace handler sets
regs->ip = ip + MCOUNT_INSN_SIZE.
kprobe_ftrace_handler must take care for that.

Link: http://lkml.kernel.org/r/20120905143112.10329.72069.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 47ae1023a93..f49f60cca40 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1072,7 +1072,8 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(p);
 	} else {
-		regs->ip += sizeof(kprobe_opcode_t);
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
 
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -1080,13 +1081,15 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 			p->pre_handler(p, regs);
 
 		if (unlikely(p->post_handler)) {
-			/* Emulate singlestep as if there is a 5byte nop */
+			/*
+			 * Emulate singlestep (and also recover regs->ip)
+			 * as if there is a 5byte nop
+			 */
 			regs->ip = ip + MCOUNT_INSN_SIZE;
 			kcb->kprobe_status = KPROBE_HIT_SSDONE;
 			p->post_handler(p, regs, 0);
 		}
 		__this_cpu_write(current_kprobe, NULL);
-		regs->ip = ip;	/* Recover for next callback */
 	}
 end:
 	local_irq_restore(flags);
-- 
cgit v1.2.3-70-g09d2


From 47d5a5f88b9d25d6464c9b60c28f391e84e3ed65 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 5 Sep 2012 23:31:18 +0900
Subject: ftrace/x86-64: Allow to change RIP in handlers

Allow ftrace handlers to change RIP register (regs->ip)
in handlers. This will allow handlers to call another
function instead of original function.

Link: http://lkml.kernel.org/r/20120905143118.10329.5078.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_64.S | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ed767b747fe..e9cc2b32bdf 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -165,6 +165,10 @@ GLOBAL(ftrace_regs_call)
 	movq EFLAGS(%rsp), %rax
 	movq %rax, SS(%rsp)
 
+	/* Handlers can change the RIP */
+	movq RIP(%rsp), %rax
+	movq %rax, SS+8(%rsp)
+
 	/* restore the rest of pt_regs */
 	movq R15(%rsp), %r15
 	movq R14(%rsp), %r14
-- 
cgit v1.2.3-70-g09d2


From c6aaf4d0bb86e2154ea31a33804cec300611255f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:25 +0900
Subject: kprobes/x86: Fix to support jprobes on ftrace-based kprobe

Fix kprobes/x86 to support jprobes on ftrace-based kprobes.
Because of -mfentry support of ftrace, ftrace is now put
on the beginning of function where jprobes are put.

Originally ftrace-based kprobes doesn't support jprobe
because it will change regs->ip and ftrace doesn't support
changing IP and ftrace itself doesn't conflict jprobe.
However, ftrace -mfentry support moves mcount call on the
top of functions where jprobes are put. This means that
jprobe always conflicts with ftrace-based kprobe and fails.

This patch allows ftrace-based kprobes to support jprobes
by allowing to modify regs->ip and kprobes breakpoint
handler also allows to skip singlestepping because there
is a ftrace call (not an original instruction).

Link: http://lkml.kernel.org/r/20120905143125.10329.90836.stgit@localhost.localdomain

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes.c | 42 +++++++++++++++++++++++++++++-------------
 kernel/kprobes.c          |  3 ---
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f49f60cca40..b7c2a85d192 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,8 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb);
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -599,6 +601,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
+#ifdef KPROBES_CAN_USE_FTRACE
+			if (kprobe_ftrace(p)) {
+				skip_singlestep(p, regs, kcb);
+				return 1;
+			}
+#endif
 			setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
@@ -1053,6 +1061,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 }
 
 #ifdef KPROBES_CAN_USE_FTRACE
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+
 /* Ftrace callback handler for kprobes */
 void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				     struct ftrace_ops *ops, struct pt_regs *regs)
@@ -1077,19 +1100,12 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (p->pre_handler)
-			p->pre_handler(p, regs);
-
-		if (unlikely(p->post_handler)) {
-			/*
-			 * Emulate singlestep (and also recover regs->ip)
-			 * as if there is a 5byte nop
-			 */
-			regs->ip = ip + MCOUNT_INSN_SIZE;
-			kcb->kprobe_status = KPROBE_HIT_SSDONE;
-			p->post_handler(p, regs, 0);
-		}
-		__this_cpu_write(current_kprobe, NULL);
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
 	}
 end:
 	local_irq_restore(flags);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 35b4315d84f..098f396aa40 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1418,9 +1418,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
-		/* break_handler (jprobe) can not work with ftrace */
-		if (p->break_handler)
-			return -EINVAL;
 		p->flags |= KPROBE_FLAG_FTRACE;
 #else	/* !KPROBES_CAN_USE_FTRACE */
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From bdc1e47217315be14ba04881b0a4c8ecb3ff320c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 20 Aug 2012 12:47:34 +0200
Subject: uprobes/x86: Implement x86 specific arch_uprobe_*_step

The arch specific implementation behaves like user_enable_single_step()
except that it does not disable single stepping if it was already
enabled by ptrace. This allows the debugger to single step over an
uprobe. The state of block stepping is not restored. It makes only sense
together with TF and if that was enabled then the debugger is notified.

Note: this is still not correct. For example, TIF_SINGLESTEP check
is not right, the application itself can set X86_EFLAGS_TF. And otoh
we leak TIF_SINGLESTEP (set by enable) if the probed insn is "popf".
See the next patches, we need the changes in arch/x86/kernel/step.c
first.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h |  2 ++
 arch/x86/kernel/uprobes.c      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f3971bbcd1d..cee58624cb3 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -46,6 +46,8 @@ struct arch_uprobe_task {
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
+#define UPROBE_CLEAR_TF			(1 << 0)
+	unsigned int			restore_flags;
 };
 
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd42091fa..309a0e02b12 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,6 +41,9 @@
 /* Adjust the return address of a call insn */
 #define UPROBE_FIX_CALL	0x2
 
+/* Instruction will modify TF, don't change it */
+#define UPROBE_FIX_SETF	0x4
+
 #define UPROBE_FIX_RIP_AX	0x8000
 #define UPROBE_FIX_RIP_CX	0x4000
 
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
 	insn_get_opcode(insn);	/* should be a nop */
 
 	switch (OPCODE1(insn)) {
+	case 0x9d:
+		/* popf */
+		auprobe->fixups |= UPROBE_FIX_SETF;
+		break;
 	case 0xc3:		/* ret/lret */
 	case 0xcb:
 	case 0xc2:
@@ -673,3 +680,29 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	}
 	return false;
 }
+
+void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
+{
+	struct uprobe_task	*utask		= current->utask;
+	struct arch_uprobe_task	*autask		= &utask->autask;
+
+	autask->restore_flags = 0;
+	if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) &&
+			!(auprobe->fixups & UPROBE_FIX_SETF))
+		autask->restore_flags |= UPROBE_CLEAR_TF;
+	/*
+	 * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we
+	 * would to examine the opcode and the flags to make it right. Without
+	 * TF block stepping makes no sense.
+	 */
+	user_enable_single_step(current);
+}
+
+void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
+{
+	struct uprobe_task *utask		= current->utask;
+	struct arch_uprobe_task	*autask		= &utask->autask;
+
+	if (autask->restore_flags & UPROBE_CLEAR_TF)
+		user_disable_single_step(current);
+}
-- 
cgit v1.2.3-70-g09d2


From 848e8f5f0ad3169560c516fff6471be65f76e69f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 3 Aug 2012 17:31:46 +0200
Subject: ptrace/x86: Introduce set_task_blockstep() helper

No functional changes, preparation for the next fix and for uprobes
single-step fixes.

Move the code playing with TIF_BLOCKSTEP/DEBUGCTLMSR_BTF into the
new helper, set_task_blockstep().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/step.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c346d116148..7a514986ca0 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,6 +157,21 @@ static int enable_single_step(struct task_struct *child)
 	return 1;
 }
 
+static void set_task_blockstep(struct task_struct *task, bool on)
+{
+	unsigned long debugctl;
+
+	debugctl = get_debugctlmsr();
+	if (on) {
+		debugctl |= DEBUGCTLMSR_BTF;
+		set_tsk_thread_flag(task, TIF_BLOCKSTEP);
+	} else {
+		debugctl &= ~DEBUGCTLMSR_BTF;
+		clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
+	}
+	update_debugctlmsr(debugctl);
+}
+
 /*
  * Enable single or block step.
  */
@@ -169,19 +184,10 @@ static void enable_step(struct task_struct *child, bool block)
 	 * So no one should try to use debugger block stepping in a program
 	 * that uses user-mode single stepping itself.
 	 */
-	if (enable_single_step(child) && block) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl |= DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		set_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl &= ~DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	}
+	if (enable_single_step(child) && block)
+		set_task_blockstep(child, true);
+	else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+		set_task_blockstep(child, false);
 }
 
 void user_enable_single_step(struct task_struct *child)
@@ -199,13 +205,8 @@ void user_disable_single_step(struct task_struct *child)
 	/*
 	 * Make sure block stepping (BTF) is disabled.
 	 */
-	if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl &= ~DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	}
+	if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+		set_task_blockstep(child, false);
 
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-- 
cgit v1.2.3-70-g09d2


From 95cf00fa5d5e2a200a2c044c84bde8389a237e02 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 11 Aug 2012 18:06:42 +0200
Subject: ptrace/x86: Partly fix set_task_blockstep()->update_debugctlmsr()
 logic

Afaics the usage of update_debugctlmsr() and TIF_BLOCKSTEP in
step.c was always very wrong.

1. update_debugctlmsr() was simply unneeded. The child sleeps
   TASK_TRACED, __switch_to_xtra(next_p => child) should notice
   TIF_BLOCKSTEP and set/clear DEBUGCTLMSR_BTF after resume if
   needed.

2. It is wrong. The state of DEBUGCTLMSR_BTF bit in CPU register
   should always match the state of current's TIF_BLOCKSTEP bit.

3. Even get_debugctlmsr() + update_debugctlmsr() itself does not
   look right. Irq can change other bits in MSR_IA32_DEBUGCTLMSR
   register or the caller can be preempted in between.

4. It is not safe to play with TIF_BLOCKSTEP if task != current.
   DEBUGCTLMSR_BTF and TIF_BLOCKSTEP should always match each
   other if the task is running. The tracee is stopped but it
   can be SIGKILL'ed right before set/clear_tsk_thread_flag().

However, now that uprobes uses user_enable_single_step(current)
we can't simply remove update_debugctlmsr(). So this patch adds
the additional "task == current" check and disables irqs to avoid
the race with interrupts/preemption.

Unfortunately this patch doesn't solve the last problem, we need
another fix. Probably we should teach ptrace_stop() to set/clear
single/block stepping after resume.

And afaics there is yet another problem: perf can play with
MSR_IA32_DEBUGCTLMSR from nmi, this obviously means that even
__switch_to_xtra() has problems.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/step.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7a514986ca0..f89cdc6ccd5 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -161,6 +161,16 @@ static void set_task_blockstep(struct task_struct *task, bool on)
 {
 	unsigned long debugctl;
 
+	/*
+	 * Ensure irq/preemption can't change debugctl in between.
+	 * Note also that both TIF_BLOCKSTEP and debugctl should
+	 * be changed atomically wrt preemption.
+	 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
+	 * wrong if task != current, SIGKILL can wakeup the stopped
+	 * tracee and set/clear can play with the running task, this
+	 * can confuse the next __switch_to_xtra().
+	 */
+	local_irq_disable();
 	debugctl = get_debugctlmsr();
 	if (on) {
 		debugctl |= DEBUGCTLMSR_BTF;
@@ -169,7 +179,9 @@ static void set_task_blockstep(struct task_struct *task, bool on)
 		debugctl &= ~DEBUGCTLMSR_BTF;
 		clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
 	}
-	update_debugctlmsr(debugctl);
+	if (task == current)
+		update_debugctlmsr(debugctl);
+	local_irq_enable();
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 9bd1190a11c9d2c59d35cb999b8d170ad52aab5f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 15:24:17 +0200
Subject: uprobes/x86: Do not (ab)use TIF_SINGLESTEP/user_*_single_step() for
 single-stepping

user_enable/disable_single_step() was designed for ptrace, it assumes
a single user and does unnecessary and wrong things for uprobes. For
example:

	- arch_uprobe_enable_step() can't trust TIF_SINGLESTEP, an
	  application itself can set X86_EFLAGS_TF which must be
	  preserved after arch_uprobe_disable_step().

	- we do not want to set TIF_SINGLESTEP/TIF_FORCED_TF in
	  arch_uprobe_enable_step(), this only makes sense for ptrace.

	- otoh we leak TIF_SINGLESTEP if arch_uprobe_disable_step()
	  doesn't do user_disable_single_step(), the application will
	  be killed after the next syscall.

	- arch_uprobe_enable_step() does access_process_vm() we do
	  not need/want.

Change arch_uprobe_enable/disable_step() to set/clear X86_EFLAGS_TF
directly, this is much simpler and more correct. However, we need to
clear TIF_BLOCKSTEP/DEBUGCTLMSR_BTF before executing the probed insn,
add set_task_blockstep(false).

Note: with or without this patch, there is another (hopefully minor)
problem. A probed "pushf" insn can see the wrong X86_EFLAGS_TF set by
uprobes. Perhaps we should change _disable to update the stack, or
teach arch_uprobe_skip_sstep() to emulate this insn.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/processor.h |  2 ++
 arch/x86/kernel/step.c           |  2 +-
 arch/x86/kernel/uprobes.c        | 32 ++++++++++++++++++--------------
 3 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bca..433d2e5c98a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+extern void set_task_blockstep(struct task_struct *task, bool on);
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f89cdc6ccd5..cd3b2438a98 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,7 +157,7 @@ static int enable_single_step(struct task_struct *child)
 	return 1;
 }
 
-static void set_task_blockstep(struct task_struct *task, bool on)
+void set_task_blockstep(struct task_struct *task, bool on)
 {
 	unsigned long debugctl;
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 309a0e02b12..3b4aae68efe 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -683,26 +683,30 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
 void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
 {
-	struct uprobe_task	*utask		= current->utask;
-	struct arch_uprobe_task	*autask		= &utask->autask;
+	struct task_struct *task = current;
+	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	struct pt_regs *regs = task_pt_regs(task);
 
 	autask->restore_flags = 0;
-	if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) &&
-			!(auprobe->fixups & UPROBE_FIX_SETF))
+	if (!(regs->flags & X86_EFLAGS_TF) &&
+	    !(auprobe->fixups & UPROBE_FIX_SETF))
 		autask->restore_flags |= UPROBE_CLEAR_TF;
-	/*
-	 * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we
-	 * would to examine the opcode and the flags to make it right. Without
-	 * TF block stepping makes no sense.
-	 */
-	user_enable_single_step(current);
+
+	regs->flags |= X86_EFLAGS_TF;
+	if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
+		set_task_blockstep(task, false);
 }
 
 void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 {
-	struct uprobe_task *utask		= current->utask;
-	struct arch_uprobe_task	*autask		= &utask->autask;
-
+	struct task_struct *task = current;
+	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	struct pt_regs *regs = task_pt_regs(task);
+	/*
+	 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
+	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
+	 * make it right.
+	 */
 	if (autask->restore_flags & UPROBE_CLEAR_TF)
-		user_disable_single_step(current);
+		regs->flags &= ~X86_EFLAGS_TF;
 }
-- 
cgit v1.2.3-70-g09d2


From 3a4664aa8362d9fa9110828f55afa9f9fcd7e484 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 16:05:10 +0200
Subject: uprobes/x86: Xol should send SIGTRAP if X86_EFLAGS_TF was set

arch_uprobe_disable_step() correctly preserves X86_EFLAGS_TF and
returns to user-mode. But this means the application gets SIGTRAP
only after the next insn.

This means that UPROBE_CLEAR_TF logic is not really right. _enable
should only record the state of X86_EFLAGS_TF, and _disable should
check it separately from UPROBE_FIX_SETF.

Remove arch_uprobe_task->restore_flags, add ->saved_tf instead, and
change enable/disable accordingly. This assumes that the probed insn
was not trapped, see the next patch.

arch_uprobe_skip_sstep() logic has the same problem, change it to
check X86_EFLAGS_TF and send SIGTRAP as well. We will cleanup this
all after we fold enable/disable_step into pre/post_hol hooks.

Note: send_sig(SIGTRAP) is not actually right, we need send_sigtrap().
But this needs more changes, handle_swbp() does the same and this is
equally wrong.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h |  3 +--
 arch/x86/kernel/uprobes.c      | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index cee58624cb3..d561ff5a3d4 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -46,8 +46,7 @@ struct arch_uprobe_task {
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
-#define UPROBE_CLEAR_TF			(1 << 0)
-	unsigned int			restore_flags;
+	unsigned int			saved_tf;
 };
 
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3b4aae68efe..7e993d1f199 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -653,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
  * Skip these instructions as per the currently known x86 ISA.
  * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
  */
-bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	int i;
 
@@ -681,16 +681,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return false;
 }
 
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	bool ret = __skip_sstep(auprobe, regs);
+	if (ret && (regs->flags & X86_EFLAGS_TF))
+		send_sig(SIGTRAP, current, 0);
+	return ret;
+}
+
 void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
 {
 	struct task_struct *task = current;
 	struct arch_uprobe_task	*autask	= &task->utask->autask;
 	struct pt_regs *regs = task_pt_regs(task);
 
-	autask->restore_flags = 0;
-	if (!(regs->flags & X86_EFLAGS_TF) &&
-	    !(auprobe->fixups & UPROBE_FIX_SETF))
-		autask->restore_flags |= UPROBE_CLEAR_TF;
+	autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
 
 	regs->flags |= X86_EFLAGS_TF;
 	if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
@@ -707,6 +712,8 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
 	 * make it right.
 	 */
-	if (autask->restore_flags & UPROBE_CLEAR_TF)
+	if (autask->saved_tf)
+		send_sig(SIGTRAP, task, 0);
+	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
 		regs->flags &= ~X86_EFLAGS_TF;
 }
-- 
cgit v1.2.3-70-g09d2


From d6a00b35e411519d774d978cdf80e4406d01b36b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 8 Sep 2012 18:38:15 +0200
Subject: uprobes/x86: Fix arch_uprobe_disable_step() && UTASK_SSTEP_TRAPPED
 interaction

arch_uprobe_disable_step() should also take UTASK_SSTEP_TRAPPED into
account. In this case the probed insn was not executed, we need to
clear X86_EFLAGS_TF if it was set by us and that is all.

Again, this code will look more clean when we move it into
arch_uprobe_post_xol() and arch_uprobe_abort_xol().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 7e993d1f199..9538f00827a 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -706,14 +706,20 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 {
 	struct task_struct *task = current;
 	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
 	struct pt_regs *regs = task_pt_regs(task);
 	/*
 	 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
 	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
 	 * make it right.
 	 */
-	if (autask->saved_tf)
-		send_sig(SIGTRAP, task, 0);
-	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
-		regs->flags &= ~X86_EFLAGS_TF;
+	if (unlikely(trapped)) {
+		if (!autask->saved_tf)
+			regs->flags &= ~X86_EFLAGS_TF;
+	} else {
+		if (autask->saved_tf)
+			send_sig(SIGTRAP, task, 0);
+		else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+			regs->flags &= ~X86_EFLAGS_TF;
+	}
 }
-- 
cgit v1.2.3-70-g09d2


From baedbf02b1912225d60dd7403acb4b4e003088b5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 17:02:16 +0200
Subject: uprobes: Make arch_uprobe_task->saved_trap_nr "unsigned int"

Make arch_uprobe_task->saved_trap_nr "unsigned int" and move it down
after ->saved_scratch_register, this changes sizeof() from 24 to 16.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index d561ff5a3d4..8ff8be7835a 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -42,10 +42,10 @@ struct arch_uprobe {
 };
 
 struct arch_uprobe_task {
-	unsigned long			saved_trap_nr;
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
+	unsigned int			saved_trap_nr;
 	unsigned int			saved_tf;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 38cb5ef4473c6f510fae3a00bdac3acd550e3796 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 26 Jul 2012 18:00:27 -0400
Subject: X86: Improve GOP detection in the EFI boot stub

We currently use the PCI IO protocol as a proxy for a functional GOP. This
is less than ideal, since some platforms will put the GOP on output devices
rather than the GPU itself. Move to using the conout protocol. This is not
guaranteed per-spec, but is part of the consplitter implementation that
causes this problem in the first place and so should be reliable.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 29 ++++++++++++++++-------------
 arch/x86/boot/compressed/eboot.h |  4 ++++
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b3e0227df2c..d5e4044505d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 	nr_gops = size / sizeof(void *);
 	for (i = 0; i < nr_gops; i++) {
 		struct efi_graphics_output_mode_info *info;
-		efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
-		void *pciio;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy;
 		void *h = gop_handle[i];
 
 		status = efi_call_phys3(sys_table->boottime->handle_protocol,
@@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		if (status != EFI_SUCCESS)
 			continue;
 
-		efi_call_phys3(sys_table->boottime->handle_protocol,
-			       h, &pciio_proto, &pciio);
+		status = efi_call_phys3(sys_table->boottime->handle_protocol,
+					h, &conout_proto, &dummy);
+
+		if (status == EFI_SUCCESS)
+			conout_found = true;
 
 		status = efi_call_phys4(gop->query_mode, gop,
 					gop->mode->mode, &size, &info);
-		if (status == EFI_SUCCESS && (!first_gop || pciio)) {
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
 			/*
-			 * Apple provide GOPs that are not backed by
-			 * real hardware (they're used to handle
-			 * multiple displays). The workaround is to
-			 * search for a GOP implementing the PCIIO
-			 * protocol, and if one isn't found, to just
-			 * fallback to the first GOP.
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
 			 */
 			width = info->horizontal_resolution;
 			height = info->vertical_resolution;
@@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 			pixels_per_scan_line = info->pixels_per_scan_line;
 
 			/*
-			 * Once we've found a GOP supporting PCIIO,
+			 * Once we've found a GOP supporting ConOut,
 			 * don't bother looking any further.
 			 */
-			if (pciio)
+			if (conout_found)
 				break;
 
 			first_gop = gop;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 3b6e15627c5..e5b0a8f91c5 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -14,6 +14,10 @@
 #define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
+#define EFI_CONSOLE_OUT_DEVICE_GUID    \
+	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
+		  0x3f, 0xc1, 0x4d)
+
 #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
 #define PIXEL_BIT_MASK					2
-- 
cgit v1.2.3-70-g09d2


From 9dead5bbb825d7c25c0400e61de83075046322d0 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 26 Jul 2012 18:00:00 -0400
Subject: efi: Build EFI stub with EFI-appropriate options

We can't assume the presence of the red zone while we're still in a boot
services environment, so we should build with -fno-red-zone to avoid
problems. Change the size of wchar at the same time to make string handling
simpler.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e398bb5d63b..8a84501acb1 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
 	$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
 	$(obj)/piggy.o
 
+$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
+$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
+
 ifeq ($(CONFIG_EFI_STUB), y)
 	VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
 endif
-- 
cgit v1.2.3-70-g09d2


From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001
From: Seiji Aguchi <seiji.aguchi@hds.com>
Date: Tue, 24 Jul 2012 13:27:23 +0000
Subject: efi: initialize efi.runtime_version to make
 query_variable_info/update_capsule workable

A value of efi.runtime_version is checked before calling
update_capsule()/query_variable_info() as follows.
But it isn't initialized anywhere.

<snip>
static efi_status_t virt_efi_query_variable_info(u32 attr,
                                                 u64 *storage_space,
                                                 u64 *remaining_space,
                                                 u64 *max_variable_size)
{
        if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
                return EFI_UNSUPPORTED;
<snip>

This patch initializes a value of efi.runtime_version at boot time.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660edaa1e..f55a4ce6dc4 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -890,6 +890,7 @@ void __init efi_enter_virtual_mode(void)
 	 *
 	 * Call EFI services through wrapper functions.
 	 */
+	efi.runtime_version = efi_systab.fw_revision;
 	efi.get_time = virt_efi_get_time;
 	efi.set_time = virt_efi_set_time;
 	efi.get_wakeup_time = virt_efi_get_wakeup_time;
-- 
cgit v1.2.3-70-g09d2


From f462ed939de67c20528bc08f11d2fc4f2d59c0d5 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 27 Jul 2012 12:58:53 -0400
Subject: efifb: Skip DMI checks if the bootloader knows what it's doing

The majority of the DMI checks in efifb are for cases where the bootloader
has provided invalid information. However, on some machines the overrides
may do more harm than good due to configuration differences between machines
with the same machine identifier. It turns out that it's possible for the
bootloader to get the correct information on GOP-based systems, but we
can't guarantee that the kernel's being booted with one that's been updated
to do so. Add support for a capabilities flag that can be set by the
bootloader, and skip the DMI checks in that case. Additionally, set this
flag in the UEFI stub code.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 2 ++
 drivers/video/efifb.c            | 4 +++-
 include/linux/screen_info.h      | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d5e4044505d..bbd83b9cb4d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -379,6 +379,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		si->rsvd_pos = 0;
 	}
 
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+
 free_handle:
 	efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
 	return status;
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index b4a632ada40..932abaa58a8 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -553,7 +553,9 @@ static int __init efifb_init(void)
 	int ret;
 	char *option = NULL;
 
-	dmi_check_system(dmi_system_table);
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+	    !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+		dmi_check_system(dmi_system_table);
 
 	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
 		return -ENODEV;
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 899fbb487c9..fb3c5a8fef3 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -68,6 +68,8 @@ struct screen_info {
 
 #define VIDEO_FLAGS_NOCURSOR	(1 << 0) /* The video mode has no cursor set */
 
+#define VIDEO_CAPABILITY_SKIP_QUIRKS	(1 << 0)
+
 #ifdef __KERNEL__
 extern struct screen_info screen_info;
 
-- 
cgit v1.2.3-70-g09d2


From e9b10953edbccd3744e039ffc060ab2692f17856 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 27 Jul 2012 17:20:49 -0400
Subject: x86, EFI: Calculate the EFI framebuffer size instead of trusting the
 firmware

Seth Forshee reported that his system was reporting that the EFI framebuffer
stretched from 0x90010000-0xb0010000 despite the GPU's BAR only covering
0x90000000-0x9ffffff. It's safer to calculate this value from the pixel
stride and screen height (values we already depend on) rather than face
potential problems with resource allocation later on.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index bbd83b9cb4d..c760e073963 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -331,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 	si->lfb_width = width;
 	si->lfb_height = height;
 	si->lfb_base = fb_base;
-	si->lfb_size = fb_size;
 	si->pages = 1;
 
 	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
@@ -379,6 +378,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		si->rsvd_pos = 0;
 	}
 
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
 	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
 
 free_handle:
-- 
cgit v1.2.3-70-g09d2


From 83287ea420ced7242a704488aab0fcdcf2ced9ab Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:57 +0300
Subject: KVM: VMX: Make lto-friendly

LTO (link-time optimization) doesn't like local labels to be referred to
from a different function, since the two functions may be built in separate
compilation units.  Use an external variable instead.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d62b4139a29..5faf12ace54 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+extern const ulong vmx_return;
+
 #define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void)
 	native_store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 
-	asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl));
-	vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
+	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
 	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -6276,11 +6277,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
 
 		/* Enter guest mode */
-		"jne .Llaunched \n\t"
+		"jne 1f \n\t"
 		__ex(ASM_VMX_VMLAUNCH) "\n\t"
-		"jmp .Lkvm_vmx_return \n\t"
-		".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
-		".Lkvm_vmx_return: "
+		"jmp 2f \n\t"
+		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
+		"2: "
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%"R"sp) \n\t"
 		"pop %0 \n\t"
@@ -6306,6 +6307,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 		"pop  %%"R"bp; pop  %%"R"dx \n\t"
 		"setbe %c[fail](%0) \n\t"
+		".pushsection .rodata \n\t"
+		".global vmx_return \n\t"
+		"vmx_return: " _ASM_PTR " 2b \n\t"
+		".popsection"
 	      : : "c"(vmx), "d"((unsigned long)HOST_RSP),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
-- 
cgit v1.2.3-70-g09d2


From b188c81f2e1a188ddda6a3d353e5b546c30a9b90 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:58 +0300
Subject: KVM: VMX: Make use of asm.h

Use macros for bitness-insensitive register names, instead of
rolling our own.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 69 ++++++++++++++++++++++++------------------------------
 1 file changed, 30 insertions(+), 39 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5faf12ace54..30bcb953afe 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6184,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 					msrs[i].host);
 }
 
-#ifdef CONFIG_X86_64
-#define R "r"
-#define Q "q"
-#else
-#define R "e"
-#define Q "l"
-#endif
-
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6240,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
-		"push %%"R"dx; push %%"R"bp;"
-		"push %%"R"cx \n\t" /* placeholder for guest rcx */
-		"push %%"R"cx \n\t"
-		"cmp %%"R"sp, %c[host_rsp](%0) \n\t"
+		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
+		"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
+		"push %%" _ASM_CX " \n\t"
+		"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
 		"je 1f \n\t"
-		"mov %%"R"sp, %c[host_rsp](%0) \n\t"
+		"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
 		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
 		"1: \n\t"
 		/* Reload cr2 if changed */
-		"mov %c[cr2](%0), %%"R"ax \n\t"
-		"mov %%cr2, %%"R"dx \n\t"
-		"cmp %%"R"ax, %%"R"dx \n\t"
+		"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
+		"mov %%cr2, %%" _ASM_DX " \n\t"
+		"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
 		"je 2f \n\t"
-		"mov %%"R"ax, %%cr2 \n\t"
+		"mov %%" _ASM_AX", %%cr2 \n\t"
 		"2: \n\t"
 		/* Check if vmlaunch of vmresume is needed */
 		"cmpl $0, %c[launched](%0) \n\t"
 		/* Load guest registers.  Don't clobber flags. */
-		"mov %c[rax](%0), %%"R"ax \n\t"
-		"mov %c[rbx](%0), %%"R"bx \n\t"
-		"mov %c[rdx](%0), %%"R"dx \n\t"
-		"mov %c[rsi](%0), %%"R"si \n\t"
-		"mov %c[rdi](%0), %%"R"di \n\t"
-		"mov %c[rbp](%0), %%"R"bp \n\t"
+		"mov %c[rax](%0), %%" _ASM_AX " \n\t"
+		"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
+		"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
+		"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
+		"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
+		"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
 		"mov %c[r8](%0),  %%r8  \n\t"
 		"mov %c[r9](%0),  %%r9  \n\t"
@@ -6274,7 +6266,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %c[r14](%0), %%r14 \n\t"
 		"mov %c[r15](%0), %%r15 \n\t"
 #endif
-		"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
+		"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
 
 		/* Enter guest mode */
 		"jne 1f \n\t"
@@ -6283,15 +6275,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
 		"2: "
 		/* Save guest registers, load host registers, keep flags */
-		"mov %0, %c[wordsize](%%"R"sp) \n\t"
+		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
 		"pop %0 \n\t"
-		"mov %%"R"ax, %c[rax](%0) \n\t"
-		"mov %%"R"bx, %c[rbx](%0) \n\t"
-		"pop"Q" %c[rcx](%0) \n\t"
-		"mov %%"R"dx, %c[rdx](%0) \n\t"
-		"mov %%"R"si, %c[rsi](%0) \n\t"
-		"mov %%"R"di, %c[rdi](%0) \n\t"
-		"mov %%"R"bp, %c[rbp](%0) \n\t"
+		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
+		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
+		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
+		"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
+		"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
+		"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
+		"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
 #ifdef CONFIG_X86_64
 		"mov %%r8,  %c[r8](%0) \n\t"
 		"mov %%r9,  %c[r9](%0) \n\t"
@@ -6302,10 +6294,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r14, %c[r14](%0) \n\t"
 		"mov %%r15, %c[r15](%0) \n\t"
 #endif
-		"mov %%cr2, %%"R"ax   \n\t"
-		"mov %%"R"ax, %c[cr2](%0) \n\t"
+		"mov %%cr2, %%" _ASM_AX "   \n\t"
+		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
-		"pop  %%"R"bp; pop  %%"R"dx \n\t"
+		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
 		"setbe %c[fail](%0) \n\t"
 		".pushsection .rodata \n\t"
 		".global vmx_return \n\t"
@@ -6335,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
 		[wordsize]"i"(sizeof(ulong))
 	      : "cc", "memory"
-		, R"ax", R"bx", R"di", R"si"
 #ifdef CONFIG_X86_64
+		, "rax", "rbx", "rdi", "rsi"
 		, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#else
+		, "eax", "ebx", "edi", "esi"
 #endif
 	      );
 
@@ -6389,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_complete_interrupts(vmx);
 }
 
-#undef R
-#undef Q
-
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-- 
cgit v1.2.3-70-g09d2


From 7454766f7bead388251aedee35a478356a7f4e72 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:59 +0300
Subject: KVM: SVM: Make use of asm.h

Use macros for bitness-insensitive register names, instead of
rolling our own.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/svm.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 611c72875fb..818fceb3091 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
-#ifdef CONFIG_X86_64
-#define R "r"
-#else
-#define R "e"
-#endif
-
 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 
 	asm volatile (
-		"push %%"R"bp; \n\t"
-		"mov %c[rbx](%[svm]), %%"R"bx \n\t"
-		"mov %c[rcx](%[svm]), %%"R"cx \n\t"
-		"mov %c[rdx](%[svm]), %%"R"dx \n\t"
-		"mov %c[rsi](%[svm]), %%"R"si \n\t"
-		"mov %c[rdi](%[svm]), %%"R"di \n\t"
-		"mov %c[rbp](%[svm]), %%"R"bp \n\t"
+		"push %%" _ASM_BP "; \n\t"
+		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+		"mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
+		"mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
+		"mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
+		"mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
+		"mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
 		"mov %c[r8](%[svm]),  %%r8  \n\t"
 		"mov %c[r9](%[svm]),  %%r9  \n\t"
@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 
 		/* Enter guest mode */
-		"push %%"R"ax \n\t"
-		"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
+		"push %%" _ASM_AX " \n\t"
+		"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
 		__ex(SVM_VMLOAD) "\n\t"
 		__ex(SVM_VMRUN) "\n\t"
 		__ex(SVM_VMSAVE) "\n\t"
-		"pop %%"R"ax \n\t"
+		"pop %%" _ASM_AX " \n\t"
 
 		/* Save guest registers, load host registers */
-		"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
-		"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
-		"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
-		"mov %%"R"si, %c[rsi](%[svm]) \n\t"
-		"mov %%"R"di, %c[rdi](%[svm]) \n\t"
-		"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
+		"mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
+		"mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
+		"mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
+		"mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
+		"mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
+		"mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
 #ifdef CONFIG_X86_64
 		"mov %%r8,  %c[r8](%[svm]) \n\t"
 		"mov %%r9,  %c[r9](%[svm]) \n\t"
@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"
 #endif
-		"pop %%"R"bp"
+		"pop %%" _ASM_BP
 		:
 		: [svm]"a"(svm),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
 #endif
 		: "cc", "memory"
-		, R"bx", R"cx", R"dx", R"si", R"di"
 #ifdef CONFIG_X86_64
+		, "rbx", "rcx", "rdx", "rsi", "rdi"
 		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
+#else
+		, "ebx", "ecx", "edx", "esi", "edi"
 #endif
 		);
 
@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	mark_all_clean(svm->vmcb);
 }
 
-#undef R
-
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-- 
cgit v1.2.3-70-g09d2


From 314d9f63f385096580e9e2a06eaa0745d92fe4ac Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Mon, 10 Sep 2012 15:53:49 +0800
Subject: perf/x86: Add cpumask for uncore pmu

This patch adds a cpumask file to the uncore pmu sysfs directory.  The
cpumask file contains one active cpu for every socket.

Signed-off-by: "Yan, Zheng" <zheng.z.yan@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: "Yan, Zheng" <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1347263631-23175-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 ++++++++++++++++++++++++---
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  6 ++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a5571080e7..62ec3e6af7e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event)
 	return ret;
 }
 
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
+
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+	.attrs = uncore_pmu_attrs,
+};
+
 static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
 {
 	int ret;
@@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
 		free_percpu(type->pmus[i].box);
 	kfree(type->pmus);
 	type->pmus = NULL;
-	kfree(type->attr_groups[1]);
-	type->attr_groups[1] = NULL;
+	kfree(type->events_group);
+	type->events_group = NULL;
 }
 
 static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
 		for (j = 0; j < i; j++)
 			attrs[j] = &type->event_descs[j].attr.attr;
 
-		type->attr_groups[1] = events_group;
+		type->events_group = events_group;
 	}
 
+	type->pmu_group = &uncore_pmu_attr_group;
 	type->pmus = pmus;
 	return 0;
 fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c1856aa..e68a4550e95 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
 	struct intel_uncore_pmu *pmus;
 	struct intel_uncore_ops *ops;
 	struct uncore_event_desc *event_descs;
-	const struct attribute_group *attr_groups[3];
+	const struct attribute_group *attr_groups[4];
 };
 
-#define format_group attr_groups[0]
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
 
 struct intel_uncore_ops {
 	void (*init_box)(struct intel_uncore_box *);
-- 
cgit v1.2.3-70-g09d2


From 9fc77441e5e1bf80b794cc546d2243ee9f4afb75 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 16 Sep 2012 11:50:30 +0300
Subject: KVM: make processes waiting on vcpu mutex killable

vcpu mutex can be held for unlimited time so
taking it with mutex_lock on an ioctl is wrong:
one process could be passed a vcpu fd and
call this ioctl on the vcpu used by another process,
it will then be unkillable until the owner exits.

Call mutex_lock_killable instead and return status.
Note: mutex_lock_interruptible would be even nicer,
but I am not sure all users are prepared to handle EINTR
from these ioctls. They might misinterpret it as an error.

Cleanup paths expect a vcpu that can't be used by
any userspace so this will always succeed - catch bugs
by calling BUG_ON.

Catch callers that don't check return state by adding
__must_check.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 12 +++++++++---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 10 +++++++---
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c4d451ed157..19047eafa38 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6016,7 +6016,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	int r;
 
 	vcpu->arch.mtrr_state.have_fixed = 1;
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	if (r)
+		return r;
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
 		r = kvm_mmu_setup(vcpu);
@@ -6027,9 +6029,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	int r;
 	vcpu->arch.apf.msr_val = 0;
 
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	BUG_ON(r);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 
@@ -6275,7 +6279,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	vcpu_load(vcpu);
+	int r;
+	r = vcpu_load(vcpu);
+	BUG_ON(r);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 40791930bc1..80bfc880921 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
-void vcpu_load(struct kvm_vcpu *vcpu);
+int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fe02d90081..cc3f6dc506e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
 /*
  * Switches to specified vcpu, until a matching vcpu_put()
  */
-void vcpu_load(struct kvm_vcpu *vcpu)
+int vcpu_load(struct kvm_vcpu *vcpu)
 {
 	int cpu;
 
-	mutex_lock(&vcpu->mutex);
+	if (mutex_lock_killable(&vcpu->mutex))
+		return -EINTR;
 	if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
 		/* The thread running this VCPU changed. */
 		struct pid *oldpid = vcpu->pid;
@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
 	preempt_notifier_register(&vcpu->preempt_notifier);
 	kvm_arch_vcpu_load(vcpu, cpu);
 	put_cpu();
+	return 0;
 }
 
 void vcpu_put(struct kvm_vcpu *vcpu)
@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
 #endif
 
 
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	if (r)
+		return r;
 	switch (ioctl) {
 	case KVM_RUN:
 		r = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From e57dbaf77f372ac461b5b0b353c65efae9739a00 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Sep 2011 15:23:21 +0200
Subject: x86, mce: Enable MCA support by default

MCA is the basic support for hardware error logging and reporting, and
it is majorly unwise to run without it so enable machine check software
support by default on x86.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..3d2d2ef0e16 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -871,6 +871,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
 	bool "Machine Check / overheating reporting"
+	default y
 	---help---
 	  Machine Check support allows the processor to notify the
 	  kernel if it detects a problem (e.g. overheating, data corruption).
-- 
cgit v1.2.3-70-g09d2


From 57639bedd2b8268daa791efcfd0367b9031b057d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 22 May 2012 18:47:38 +0200
Subject: x86, MCE: Remove unused defines

Those were sitting there unused since the dawn of time, drop them.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/include/asm/mce.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a3ac52b29cb..ccaf7c581c8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,19 +116,9 @@ struct mce_log {
 /* Software defined banks */
 #define MCE_EXTENDED_BANK	128
 #define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
-
-#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)      /* MCE_AMD */
-#define K8_MCE_THRESHOLD_BANK_0    (MCE_THRESHOLD_BASE + 0 * 9)
-#define K8_MCE_THRESHOLD_BANK_1    (MCE_THRESHOLD_BASE + 1 * 9)
-#define K8_MCE_THRESHOLD_BANK_2    (MCE_THRESHOLD_BASE + 2 * 9)
-#define K8_MCE_THRESHOLD_BANK_3    (MCE_THRESHOLD_BASE + 3 * 9)
-#define K8_MCE_THRESHOLD_BANK_4    (MCE_THRESHOLD_BASE + 4 * 9)
-#define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
-#define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
-
+#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)
 
 #ifdef __KERNEL__
-
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
-- 
cgit v1.2.3-70-g09d2


From 050902c011712ad4703038fa4489ec4edd87d396 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 24 Jul 2012 16:05:27 -0700
Subject: x86, signal: Cleanup ifdefs and is_ia32, is_x32

Use config_enabled() to cleanup the definitions of is_ia32/is_x32. Move
the function prototypes to the header file to cleanup ifdefs,
and move the x32_setup_rt_frame() code around.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-2-git-send-email-suresh.b.siddha@intel.com
Merged in compilation fix from,
Link: http://lkml.kernel.org/r/1344544736.8326.17.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h |  26 ++++-
 arch/x86/include/asm/signal.h       |   4 +
 arch/x86/kernel/signal.c            | 196 +++++++++++++++---------------------
 3 files changed, 110 insertions(+), 116 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 75f4c6d6a33..6f595435ff9 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -12,6 +12,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
+#include <linux/compat.h>
 #include <linux/slab.h>
 #include <asm/asm.h>
 #include <asm/cpufeature.h>
@@ -32,7 +33,6 @@ extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
 				 xstateregs_set;
 
-
 /*
  * xstateregs_active == fpregs_active. Please refer to the comment
  * at the definition of fpregs_active.
@@ -55,6 +55,22 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
+static inline int is_ia32_compat_frame(void)
+{
+	return config_enabled(CONFIG_IA32_EMULATION) &&
+	       test_thread_flag(TIF_IA32);
+}
+
+static inline int is_ia32_frame(void)
+{
+	return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+}
+
+static inline int is_x32_frame(void)
+{
+	return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+}
+
 #define X87_FSW_ES (1 << 7)	/* Exception Summary */
 
 static __always_inline __pure bool use_xsaveopt(void)
@@ -180,6 +196,11 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #endif
 }
 
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		     compat_sigset_t *set, struct pt_regs *regs);
+
 #else  /* CONFIG_X86_32 */
 
 /* perform fxrstor iff the processor has extended states, otherwise frstor */
@@ -204,6 +225,9 @@ static inline void fpu_fxsave(struct fpu *fpu)
 		     : [fx] "=m" (fpu->state->fxsave));
 }
 
+#define ia32_setup_frame	__setup_frame
+#define ia32_setup_rt_frame	__setup_rt_frame
+
 #endif	/* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 598457cbd0f..323973f4abf 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,10 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#ifndef CONFIG_COMPAT
+typedef sigset_t compat_sigset_t;
+#endif
+
 #else
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376..bed431a3816 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -209,24 +209,21 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	unsigned long sp = regs->sp;
 	int onsigstack = on_sig_stack(sp);
 
-#ifdef CONFIG_X86_64
 	/* redzone */
-	sp -= 128;
-#endif /* CONFIG_X86_64 */
+	if (config_enabled(CONFIG_X86_64))
+		sp -= 128;
 
 	if (!onsigstack) {
 		/* This is the X/Open sanctioned signal stack switching.  */
 		if (ka->sa.sa_flags & SA_ONSTACK) {
 			if (current->sas_ss_size)
 				sp = current->sas_ss_sp + current->sas_ss_size;
-		} else {
-#ifdef CONFIG_X86_32
-			/* This is the legacy signal stack switching. */
-			if ((regs->ss & 0xffff) != __USER_DS &&
-				!(ka->sa.sa_flags & SA_RESTORER) &&
-					ka->sa.sa_restorer)
+		} else if (config_enabled(CONFIG_X86_32) &&
+			   (regs->ss & 0xffff) != __USER_DS &&
+			   !(ka->sa.sa_flags & SA_RESTORER) &&
+			   ka->sa.sa_restorer) {
+				/* This is the legacy signal stack switching. */
 				sp = (unsigned long) ka->sa.sa_restorer;
-#endif /* CONFIG_X86_32 */
 		}
 	}
 
@@ -474,6 +471,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 }
 #endif /* CONFIG_X86_32 */
 
+static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
+			      siginfo_t *info, compat_sigset_t *set,
+			      struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_X32_ABI
+	struct rt_sigframe_x32 __user *frame;
+	void __user *restorer;
+	int err = 0;
+	void __user *fpstate = NULL;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		if (copy_siginfo_to_user32(&frame->info, info))
+			return -EFAULT;
+	}
+
+	put_user_try {
+		/* Create the ucontext.  */
+		if (cpu_has_xsave)
+			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+		else
+			put_user_ex(0, &frame->uc.uc_flags);
+		put_user_ex(0, &frame->uc.uc_link);
+		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+		put_user_ex(sas_ss_flags(regs->sp),
+			    &frame->uc.uc_stack.ss_flags);
+		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		put_user_ex(0, &frame->uc.uc__pad0);
+		err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+					regs, set->sig[0]);
+		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+		if (ka->sa.sa_flags & SA_RESTORER) {
+			restorer = ka->sa.sa_restorer;
+		} else {
+			/* could use a vstub here */
+			restorer = NULL;
+			err |= -EFAULT;
+		}
+		put_user_ex(restorer, &frame->pretcode);
+	} put_user_catch(err);
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+
+	/* We use the x32 calling convention here... */
+	regs->di = sig;
+	regs->si = (unsigned long) &frame->info;
+	regs->dx = (unsigned long) &frame->uc;
+
+	loadsegment(ds, __USER_DS);
+	loadsegment(es, __USER_DS);
+
+	regs->cs = __USER_CS;
+	regs->ss = __USER_DS;
+#endif	/* CONFIG_X86_X32_ABI */
+
+	return 0;
+}
+
 #ifdef CONFIG_X86_32
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
@@ -612,55 +677,22 @@ static int signr_convert(int sig)
 	return sig;
 }
 
-#ifdef CONFIG_X86_32
-
-#define is_ia32	1
-#define ia32_setup_frame	__setup_frame
-#define ia32_setup_rt_frame	__setup_rt_frame
-
-#else /* !CONFIG_X86_32 */
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32	test_thread_flag(TIF_IA32)
-#else /* !CONFIG_IA32_EMULATION */
-#define is_ia32	0
-#endif /* CONFIG_IA32_EMULATION */
-
-#ifdef CONFIG_X86_X32_ABI
-#define is_x32	test_thread_flag(TIF_X32)
-
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
-			      siginfo_t *info, compat_sigset_t *set,
-			      struct pt_regs *regs);
-#else /* !CONFIG_X86_X32_ABI */
-#define is_x32	0
-#endif /* CONFIG_X86_X32_ABI */
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-		sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		sigset_t *set, struct pt_regs *regs);
-
-#endif /* CONFIG_X86_32 */
-
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		struct pt_regs *regs)
 {
 	int usig = signr_convert(sig);
 	sigset_t *set = sigmask_to_save();
+	compat_sigset_t *cset = (compat_sigset_t *) set;
 
 	/* Set up the stack frame */
-	if (is_ia32) {
+	if (is_ia32_frame()) {
 		if (ka->sa.sa_flags & SA_SIGINFO)
-			return ia32_setup_rt_frame(usig, ka, info, set, regs);
+			return ia32_setup_rt_frame(usig, ka, info, cset, regs);
 		else
-			return ia32_setup_frame(usig, ka, set, regs);
-#ifdef CONFIG_X86_X32_ABI
-	} else if (is_x32) {
-		return x32_setup_rt_frame(usig, ka, info,
-					 (compat_sigset_t *)set, regs);
-#endif
+			return ia32_setup_frame(usig, ka, cset, regs);
+	} else if (is_x32_frame()) {
+		return x32_setup_rt_frame(usig, ka, info, cset, regs);
 	} else {
 		return __setup_rt_frame(sig, ka, info, set, regs);
 	}
@@ -824,72 +856,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 }
 
 #ifdef CONFIG_X86_X32_ABI
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
-			      siginfo_t *info, compat_sigset_t *set,
-			      struct pt_regs *regs)
-{
-	struct rt_sigframe_x32 __user *frame;
-	void __user *restorer;
-	int err = 0;
-	void __user *fpstate = NULL;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	if (ka->sa.sa_flags & SA_SIGINFO) {
-		if (copy_siginfo_to_user32(&frame->info, info))
-			return -EFAULT;
-	}
-
-	put_user_try {
-		/* Create the ucontext.  */
-		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
-		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-		put_user_ex(0, &frame->uc.uc__pad0);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-					regs, set->sig[0]);
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
-		if (ka->sa.sa_flags & SA_RESTORER) {
-			restorer = ka->sa.sa_restorer;
-		} else {
-			/* could use a vstub here */
-			restorer = NULL;
-			err |= -EFAULT;
-		}
-		put_user_ex(restorer, &frame->pretcode);
-	} put_user_catch(err);
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->sp = (unsigned long) frame;
-	regs->ip = (unsigned long) ka->sa.sa_handler;
-
-	/* We use the x32 calling convention here... */
-	regs->di = sig;
-	regs->si = (unsigned long) &frame->info;
-	regs->dx = (unsigned long) &frame->uc;
-
-	loadsegment(ds, __USER_DS);
-	loadsegment(es, __USER_DS);
-
-	regs->cs = __USER_CS;
-	regs->ss = __USER_DS;
-
-	return 0;
-}
-
 asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe_x32 __user *frame;
-- 
cgit v1.2.3-70-g09d2


From 0ca5bd0d886578ad0afeceaa83458c0f35cb3c6b Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 24 Jul 2012 16:05:28 -0700
Subject: x86, fpu: Consolidate inline asm routines for saving/restoring fpu
 state

Consolidate x86, x86_64 inline asm routines saving/restoring fpu state
using config_enabled().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-3-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h | 184 +++++++++++++++---------------------
 arch/x86/include/asm/xsave.h        |   6 +-
 arch/x86/kernel/xsave.c             |   4 +-
 3 files changed, 81 insertions(+), 113 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 6f595435ff9..016acb30fa4 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -97,34 +97,24 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
 	__sanitize_i387_state(tsk);
 }
 
-#ifdef CONFIG_X86_64
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
-	int err;
-
-	/* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
-	asm volatile("1:  fxrstorq %[fx]\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err)
-		     : [fx] "m" (*fx), "0" (0));
-#else
-	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err)
-		     : [fx] "R" (fx), "m" (*fx), "0" (0));
-#endif
-	return err;
+#define check_insn(insn, output, input...)				\
+({									\
+	int err;							\
+	asm volatile("1:" #insn "\n\t"					\
+		     "2:\n"						\
+		     ".section .fixup,\"ax\"\n"				\
+		     "3:  movl $-1,%[err]\n"				\
+		     "    jmp  2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 3b)				\
+		     : [err] "=r" (err), output				\
+		     : "0"(0), input);					\
+	err;								\
+})
+
+static inline int fsave_user(struct i387_fsave_struct __user *fx)
+{
+	return check_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
 }
 
 static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
@@ -140,90 +130,73 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 	if (unlikely(err))
 		return -EFAULT;
 
-	/* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
-	asm volatile("1:  fxsaveq %[fx]\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err), [fx] "=m" (*fx)
-		     : "0" (0));
-#else
-	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err), "=m" (*fx)
-		     : [fx] "R" (fx), "0" (0));
-#endif
-	if (unlikely(err) &&
-	    __clear_user(fx, sizeof(struct i387_fxsave_struct)))
-		err = -EFAULT;
-	/* No need to clear here because the caller clears USED_MATH */
-	return err;
+	if (config_enabled(CONFIG_X86_32))
+		return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
+
+	/* See comment in fpu_fxsave() below. */
+	return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
 }
 
-static inline void fpu_fxsave(struct fpu *fpu)
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
-	/* Using "rex64; fxsave %0" is broken because, if the memory operand
-	   uses any extended registers for addressing, a second REX prefix
-	   will be generated (to the assembler, rex64 followed by semicolon
-	   is a separate instruction), and hence the 64-bitness is lost. */
+	if (config_enabled(CONFIG_X86_32))
+		return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 
-#ifdef CONFIG_AS_FXSAVEQ
-	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
-	   starting with gas 2.16. */
-	__asm__ __volatile__("fxsaveq %0"
-			     : "=m" (fpu->state->fxsave));
-#else
-	/* Using, as a workaround, the properly prefixed form below isn't
-	   accepted by any binutils version so far released, complaining that
-	   the same type of prefix is used twice if an extended register is
-	   needed for addressing (fix submitted to mainline 2005-11-21).
-	asm volatile("rex64/fxsave %0"
-		     : "=m" (fpu->state->fxsave));
-	   This, however, we can work around by forcing the compiler to select
-	   an addressing mode that doesn't require extended registers. */
-	asm volatile("rex64/fxsave (%[fx])"
-		     : "=m" (fpu->state->fxsave)
-		     : [fx] "R" (&fpu->state->fxsave));
-#endif
+	/* See comment in fpu_fxsave() below. */
+	return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+			  "m" (*fx));
 }
 
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		     compat_sigset_t *set, struct pt_regs *regs);
-
-#else  /* CONFIG_X86_32 */
-
-/* perform fxrstor iff the processor has extended states, otherwise frstor */
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+static inline int frstor_checking(struct i387_fsave_struct *fx)
 {
-	/*
-	 * The "nop" is needed to make the instructions the same
-	 * length.
-	 */
-	alternative_input(
-		"nop ; frstor %1",
-		"fxrstor %1",
-		X86_FEATURE_FXSR,
-		"m" (*fx));
-
-	return 0;
+	return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
 static inline void fpu_fxsave(struct fpu *fpu)
 {
-	asm volatile("fxsave %[fx]"
-		     : [fx] "=m" (fpu->state->fxsave));
+	if (config_enabled(CONFIG_X86_32))
+		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
+	else {
+		/* Using "rex64; fxsave %0" is broken because, if the memory
+		 * operand uses any extended registers for addressing, a second
+		 * REX prefix will be generated (to the assembler, rex64
+		 * followed by semicolon is a separate instruction), and hence
+		 * the 64-bitness is lost.
+		 *
+		 * Using "fxsaveq %0" would be the ideal choice, but is only
+		 * supported starting with gas 2.16.
+		 *
+		 * Using, as a workaround, the properly prefixed form below
+		 * isn't accepted by any binutils version so far released,
+		 * complaining that the same type of prefix is used twice if
+		 * an extended register is needed for addressing (fix submitted
+		 * to mainline 2005-11-21).
+		 *
+		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
+		 *
+		 * This, however, we can work around by forcing the compiler to
+		 * select an addressing mode that doesn't require extended
+		 * registers.
+		 */
+		asm volatile( "rex64/fxsave (%[fx])"
+			     : "=m" (fpu->state->fxsave)
+			     : [fx] "R" (&fpu->state->fxsave));
+	}
 }
+#ifdef CONFIG_X86_64
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		     compat_sigset_t *set, struct pt_regs *regs);
+
+#else  /* CONFIG_X86_32 */
 
 #define ia32_setup_frame	__setup_frame
 #define ia32_setup_rt_frame	__setup_rt_frame
@@ -272,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk)
 	return fpu_save_init(&tsk->thread.fpu);
 }
 
-static inline int fpu_fxrstor_checking(struct fpu *fpu)
-{
-	return fxrstor_checking(&fpu->state->fxsave);
-}
-
 static inline int fpu_restore_checking(struct fpu *fpu)
 {
 	if (use_xsave())
-		return fpu_xrstor_checking(fpu);
+		return fpu_xrstor_checking(&fpu->state->xsave);
+	else if (use_fxsr())
+		return fxrstor_checking(&fpu->state->fxsave);
 	else
-		return fpu_fxrstor_checking(fpu);
+		return frstor_checking(&fpu->state->fsave);
 }
 
 static inline int restore_fpu_checking(struct task_struct *tsk)
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 8a1b6f9b594..aaac810e861 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -42,9 +42,8 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
 			    void __user *fpstate,
 			    struct _fpx_sw_bytes *sw);
 
-static inline int fpu_xrstor_checking(struct fpu *fpu)
+static inline int fpu_xrstor_checking(struct xsave_struct *fx)
 {
-	struct xsave_struct *fx = &fpu->state->xsave;
 	int err;
 
 	asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
@@ -84,9 +83,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
 			     : [err] "=r" (err)
 			     : "D" (buf), "a" (-1), "d" (-1), "0" (0)
 			     : "memory");
-	if (unlikely(err) && __clear_user(buf, xstate_size))
-		err = -EFAULT;
-	/* No need to clear here because the caller clears USED_MATH */
 	return err;
 }
 
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9e1a8a7ba6e..7a3d4df9eaf 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -176,8 +176,10 @@ int save_i387_xstate(void __user *buf)
 		else
 			err = fxsave_user(buf);
 
-		if (err)
+		if (unlikely(err)) {
+			__clear_user(buf, xstate_size);
 			return err;
+		}
 		user_fpu_end();
 	} else {
 		sanitize_i387_state(tsk);
-- 
cgit v1.2.3-70-g09d2


From 72a671ced66db6d1c2bfff1c930a101ac8d08204 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 24 Jul 2012 16:05:29 -0700
Subject: x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels

Currently for x86 and x86_32 binaries, fpstate in the user sigframe is copied
to/from the fpstate in the task struct.

And in the case of signal delivery for x86_64 binaries, if the fpstate is live
in the CPU registers, then the live state is copied directly to the user
sigframe. Otherwise  fpstate in the task struct is copied to the user sigframe.
During restore, fpstate in the user sigframe is restored directly to the live
CPU registers.

Historically, different code paths led to different bugs. For example,
x86_64 code path was not preemption safe till recently. Also there is lot
of code duplication for support of new features like xsave etc.

Unify signal handling code paths for x86 and x86_64 kernels.

New strategy is as follows:

Signal delivery: Both for 32/64-bit frames, align the core math frame area to
64bytes as needed by xsave (this where the main fpu/extended state gets copied
to and excludes the legacy compatibility fsave header for the 32-bit [f]xsave
frames). If the state is live, copy the register state directly to the user
frame. If not live, copy the state in the thread struct to the user frame. And
for 32-bit [f]xsave frames, construct the fsave header separately before
the actual [f]xsave area.

Signal return: As the 32-bit frames with [f]xstate has an additional
'fsave' header, copy everything back from the user sigframe to the
fpstate in the task structure and reconstruct the fxstate from the 'fsave'
header (Also user passed pointers may not be correctly aligned for
any attempt to directly restore any partial state). At the next fpstate usage,
everything will be restored to the live CPU registers.
For all the 64-bit frames and the 32-bit fsave frame, restore the state from
the user sigframe directly to the live CPU registers. 64-bit signals always
restored the math frame directly, so we can expect the math frame pointer
to be correctly aligned. For 32-bit fsave frames, there are no alignment
requirements, so we can restore the state directly.

"lat_sig catch" microbenchmark numbers (for x86, x86_64, x86_32 binaries) are
with in the noise range with this change.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-4-git-send-email-suresh.b.siddha@intel.com
[ Merged in compilation fix ]
Link: http://lkml.kernel.org/r/1344544736.8326.17.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/ia32/ia32_signal.c         |   9 +-
 arch/x86/include/asm/fpu-internal.h | 111 +++++----
 arch/x86/include/asm/xsave.h        |   6 +-
 arch/x86/kernel/i387.c              | 246 +-------------------
 arch/x86/kernel/process.c           |  10 -
 arch/x86/kernel/ptrace.c            |   3 -
 arch/x86/kernel/signal.c            |  15 +-
 arch/x86/kernel/xsave.c             | 432 +++++++++++++++++++++---------------
 8 files changed, 348 insertions(+), 484 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 452d4dd0a95..8c77c64fbd2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 		get_user_ex(tmp, &sc->fpstate);
 		buf = compat_ptr(tmp);
-		err |= restore_i387_xstate_ia32(buf);
+		err |= restore_xstate_sig(buf, 1);
 
 		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 		sp = (unsigned long) ka->sa.sa_restorer;
 
 	if (used_math()) {
-		sp = sp - sig_xstate_ia32_size;
+		unsigned long fx_aligned, math_size;
+
+		sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
 		*fpstate = (struct _fpstate_ia32 __user *) sp;
-		if (save_i387_xstate_ia32(*fpstate) < 0)
+		if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
+				    math_size) < 0)
 			return (void __user *) -1L;
 	}
 
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 016acb30fa4..4fbb4195bc6 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -22,11 +22,30 @@
 #include <asm/uaccess.h>
 #include <asm/xsave.h>
 
-extern unsigned int sig_xstate_size;
+#ifdef CONFIG_X86_64
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		     compat_sigset_t *set, struct pt_regs *regs);
+#else
+# define user_i387_ia32_struct	user_i387_struct
+# define user32_fxsr_struct	user_fxsr_struct
+# define ia32_setup_frame	__setup_frame
+# define ia32_setup_rt_frame	__setup_rt_frame
+#endif
+
+extern unsigned int mxcsr_feature_mask;
 extern void fpu_init(void);
 
 DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
+			      struct task_struct *tsk);
+extern void convert_to_fxsr(struct task_struct *tsk,
+			    const struct user_i387_ia32_struct *env);
+
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
 				xstateregs_get;
@@ -39,19 +58,11 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
  */
 #define xstateregs_active	fpregs_active
 
-extern struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-extern unsigned int sig_xstate_ia32_size;
-extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
-struct _fpstate_ia32;
-struct _xstate_ia32;
-extern int save_i387_xstate_ia32(void __user *buf);
-extern int restore_i387_xstate_ia32(void __user *buf);
-#endif
-
 #ifdef CONFIG_MATH_EMULATION
+# define HAVE_HWFP		(boot_cpu_data.hard_math)
 extern void finit_soft_fpu(struct i387_soft_struct *soft);
 #else
+# define HAVE_HWFP		1
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
@@ -119,17 +130,6 @@ static inline int fsave_user(struct i387_fsave_struct __user *fx)
 
 static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 {
-	int err;
-
-	/*
-	 * Clear the bytes not touched by the fxsave and reserved
-	 * for the SW usage.
-	 */
-	err = __clear_user(&fx->sw_reserved,
-			   sizeof(struct _fpx_sw_bytes));
-	if (unlikely(err))
-		return -EFAULT;
-
 	if (config_enabled(CONFIG_X86_32))
 		return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
 	else if (config_enabled(CONFIG_AS_FXSAVEQ))
@@ -189,19 +189,6 @@ static inline void fpu_fxsave(struct fpu *fpu)
 			     : [fx] "R" (&fpu->state->fxsave));
 	}
 }
-#ifdef CONFIG_X86_64
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		     compat_sigset_t *set, struct pt_regs *regs);
-
-#else  /* CONFIG_X86_32 */
-
-#define ia32_setup_frame	__setup_frame
-#define ia32_setup_rt_frame	__setup_rt_frame
-
-#endif	/* CONFIG_X86_64 */
 
 /*
  * These must be called with preempt disabled. Returns
@@ -392,10 +379,28 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
 /*
  * Signal frame handlers...
  */
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
+extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
+extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
+
+static inline int xstate_sigframe_size(void)
+{
+	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+}
+
+static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
+{
+	void __user *buf_fx = buf;
+	int size = xstate_sigframe_size();
+
+	if (ia32_frame && use_fxsr()) {
+		buf_fx = buf + sizeof(struct i387_fsave_struct);
+		size += sizeof(struct i387_fsave_struct);
+	}
+
+	return __restore_xstate_sig(buf, buf_fx, size);
+}
 
-static inline void __clear_fpu(struct task_struct *tsk)
+static inline void __drop_fpu(struct task_struct *tsk)
 {
 	if (__thread_has_fpu(tsk)) {
 		/* Ignore delayed exceptions from user space */
@@ -443,11 +448,21 @@ static inline void save_init_fpu(struct task_struct *tsk)
 	preempt_enable();
 }
 
-static inline void clear_fpu(struct task_struct *tsk)
+static inline void stop_fpu_preload(struct task_struct *tsk)
 {
+	tsk->fpu_counter = 0;
+}
+
+static inline void drop_fpu(struct task_struct *tsk)
+{
+	/*
+	 * Forget coprocessor state..
+	 */
+	stop_fpu_preload(tsk);
 	preempt_disable();
-	__clear_fpu(tsk);
+	__drop_fpu(tsk);
 	preempt_enable();
+	clear_used_math();
 }
 
 /*
@@ -511,4 +526,20 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src)
 
 extern void fpu_finit(struct fpu *fpu);
 
+static inline unsigned long
+alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
+		unsigned long *size)
+{
+	unsigned long frame_size = xstate_sigframe_size();
+
+	*buf_fx = sp = round_down(sp - frame_size, 64);
+	if (ia32_frame && use_fxsr()) {
+		frame_size += sizeof(struct i387_fsave_struct);
+		sp -= sizeof(struct i387_fsave_struct);
+	}
+
+	*size = frame_size;
+	return sp;
+}
+
 #endif
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index aaac810e861..c1d989a1519 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -38,9 +38,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 extern void xsave_init(void);
 extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 extern int init_fpu(struct task_struct *child);
-extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
-			    void __user *fpstate,
-			    struct _fpx_sw_bytes *sw);
 
 static inline int fpu_xrstor_checking(struct xsave_struct *fx)
 {
@@ -68,8 +65,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
 	 * Clear the xsave header first, so that reserved fields are
 	 * initialized to zero.
 	 */
-	err = __clear_user(&buf->xsave_hdr,
-			   sizeof(struct xsave_hdr_struct));
+	err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
 	if (unlikely(err))
 		return -EFAULT;
 
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f250431fb50..ab6a2e8028a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -19,20 +19,6 @@
 #include <asm/fpu-internal.h>
 #include <asm/user.h>
 
-#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
-# include <asm/user32.h>
-#else
-# define save_i387_xstate_ia32		save_i387_xstate
-# define restore_i387_xstate_ia32	restore_i387_xstate
-# define _fpstate_ia32		_fpstate
-# define _xstate_ia32		_xstate
-# define sig_xstate_ia32_size   sig_xstate_size
-# define fx_sw_reserved_ia32	fx_sw_reserved
-# define user_i387_ia32_struct	user_i387_struct
-# define user32_fxsr_struct	user_fxsr_struct
-#endif
-
 /*
  * Were we in an interrupt that interrupted kernel mode?
  *
@@ -113,16 +99,9 @@ void unlazy_fpu(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(unlazy_fpu);
 
-#ifdef CONFIG_MATH_EMULATION
-# define HAVE_HWFP		(boot_cpu_data.hard_math)
-#else
-# define HAVE_HWFP		1
-#endif
-
-static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
 unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
-unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
 static void __cpuinit mxcsr_feature_mask_init(void)
@@ -454,7 +433,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
  * FXSR floating point environment conversions.
  */
 
-static void
+void
 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 {
 	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -491,8 +470,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 		memcpy(&to[i], &from[i], sizeof(to[0]));
 }
 
-static void convert_to_fxsr(struct task_struct *tsk,
-			    const struct user_i387_ia32_struct *env)
+void convert_to_fxsr(struct task_struct *tsk,
+		     const struct user_i387_ia32_struct *env)
 
 {
 	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -588,223 +567,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
-{
-	struct task_struct *tsk = current;
-	struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
-
-	fp->status = fp->swd;
-	if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
-		return -1;
-	return 1;
-}
-
-static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
-{
-	struct task_struct *tsk = current;
-	struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
-	struct user_i387_ia32_struct env;
-	int err = 0;
-
-	convert_from_fxsr(&env, tsk);
-	if (__copy_to_user(buf, &env, sizeof(env)))
-		return -1;
-
-	err |= __put_user(fx->swd, &buf->status);
-	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
-	if (err)
-		return -1;
-
-	if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
-		return -1;
-	return 1;
-}
-
-static int save_i387_xsave(void __user *buf)
-{
-	struct task_struct *tsk = current;
-	struct _fpstate_ia32 __user *fx = buf;
-	int err = 0;
-
-
-	sanitize_i387_state(tsk);
-
-	/*
-	 * For legacy compatible, we always set FP/SSE bits in the bit
-	 * vector while saving the state to the user context.
-	 * This will enable us capturing any changes(during sigreturn) to
-	 * the FP/SSE bits by the legacy applications which don't touch
-	 * xstate_bv in the xsave header.
-	 *
-	 * xsave aware applications can change the xstate_bv in the xsave
-	 * header as well as change any contents in the memory layout.
-	 * xrestore as part of sigreturn will capture all the changes.
-	 */
-	tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
-
-	if (save_i387_fxsave(fx) < 0)
-		return -1;
-
-	err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
-			     sizeof(struct _fpx_sw_bytes));
-	err |= __put_user(FP_XSTATE_MAGIC2,
-			  (__u32 __user *) (buf + sig_xstate_ia32_size
-					    - FP_XSTATE_MAGIC2_SIZE));
-	if (err)
-		return -1;
-
-	return 1;
-}
-
-int save_i387_xstate_ia32(void __user *buf)
-{
-	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
-	struct task_struct *tsk = current;
-
-	if (!used_math())
-		return 0;
-
-	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
-		return -EACCES;
-	/*
-	 * This will cause a "finit" to be triggered by the next
-	 * attempted FPU operation by the 'current' process.
-	 */
-	clear_used_math();
-
-	if (!HAVE_HWFP) {
-		return fpregs_soft_get(current, NULL,
-				       0, sizeof(struct user_i387_ia32_struct),
-				       NULL, fp) ? -1 : 1;
-	}
-
-	unlazy_fpu(tsk);
-
-	if (cpu_has_xsave)
-		return save_i387_xsave(fp);
-	if (cpu_has_fxsr)
-		return save_i387_fxsave(fp);
-	else
-		return save_i387_fsave(fp);
-}
-
-static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
-{
-	struct task_struct *tsk = current;
-
-	return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
-				sizeof(struct i387_fsave_struct));
-}
-
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
-			       unsigned int size)
-{
-	struct task_struct *tsk = current;
-	struct user_i387_ia32_struct env;
-	int err;
-
-	err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
-			       size);
-	/* mxcsr reserved bits must be masked to zero for security reasons */
-	tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-	if (err || __copy_from_user(&env, buf, sizeof(env)))
-		return 1;
-	convert_to_fxsr(tsk, &env);
-
-	return 0;
-}
-
-static int restore_i387_xsave(void __user *buf)
-{
-	struct _fpx_sw_bytes fx_sw_user;
-	struct _fpstate_ia32 __user *fx_user =
-			((struct _fpstate_ia32 __user *) buf);
-	struct i387_fxsave_struct __user *fx =
-		(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
-	struct xsave_hdr_struct *xsave_hdr =
-				&current->thread.fpu.state->xsave.xsave_hdr;
-	u64 mask;
-	int err;
-
-	if (check_for_xstate(fx, buf, &fx_sw_user))
-		goto fx_only;
-
-	mask = fx_sw_user.xstate_bv;
-
-	err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
-
-	xsave_hdr->xstate_bv &= pcntxt_mask;
-	/*
-	 * These bits must be zero.
-	 */
-	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
-
-	/*
-	 * Init the state that is not present in the memory layout
-	 * and enabled by the OS.
-	 */
-	mask = ~(pcntxt_mask & ~mask);
-	xsave_hdr->xstate_bv &= mask;
-
-	return err;
-fx_only:
-	/*
-	 * Couldn't find the extended state information in the memory
-	 * layout. Restore the FP/SSE and init the other extended state
-	 * enabled by the OS.
-	 */
-	xsave_hdr->xstate_bv = XSTATE_FPSSE;
-	return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
-}
-
-int restore_i387_xstate_ia32(void __user *buf)
-{
-	int err;
-	struct task_struct *tsk = current;
-	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
-
-	if (HAVE_HWFP)
-		clear_fpu(tsk);
-
-	if (!buf) {
-		if (used_math()) {
-			clear_fpu(tsk);
-			clear_used_math();
-		}
-
-		return 0;
-	} else
-		if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
-			return -EACCES;
-
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
-
-	if (HAVE_HWFP) {
-		if (cpu_has_xsave)
-			err = restore_i387_xsave(buf);
-		else if (cpu_has_fxsr)
-			err = restore_i387_fxsave(fp, sizeof(struct
-							   i387_fxsave_struct));
-		else
-			err = restore_i387_fsave(fp);
-	} else {
-		err = fpregs_soft_set(current, NULL,
-				      0, sizeof(struct user_i387_ia32_struct),
-				      NULL, fp) != 0;
-	}
-	set_used_math();
-
-	return err;
-}
-
 /*
  * FPU state for core dumps.
  * This is only used for a.out dumps now.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a8456f71..30069d1a6a4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -97,16 +97,6 @@ void arch_task_cache_init(void)
 				  SLAB_PANIC | SLAB_NOTRACK, NULL);
 }
 
-static inline void drop_fpu(struct task_struct *tsk)
-{
-	/*
-	 * Forget coprocessor state..
-	 */
-	tsk->fpu_counter = 0;
-	clear_fpu(tsk);
-	clear_used_math();
-}
-
 /*
  * Free current thread data structures etc..
  */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0..861a9d1a463 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1332,9 +1332,6 @@ static const struct user_regset_view user_x86_64_view = {
 #define genregs32_get		genregs_get
 #define genregs32_set		genregs_set
 
-#define user_i387_ia32_struct	user_i387_struct
-#define user32_fxsr_struct	user_fxsr_struct
-
 #endif	/* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index bed431a3816..e10f96a7e04 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		regs->orig_ax = -1;		/* disable syscall checks */
 
 		get_user_ex(buf, &sc->fpstate);
-		err |= restore_i387_xstate(buf);
+		err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
 
 		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
@@ -206,7 +206,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	     void __user **fpstate)
 {
 	/* Default to using normal stack */
+	unsigned long math_size = 0;
 	unsigned long sp = regs->sp;
+	unsigned long buf_fx = 0;
 	int onsigstack = on_sig_stack(sp);
 
 	/* redzone */
@@ -228,10 +230,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	}
 
 	if (used_math()) {
-		sp -= sig_xstate_size;
-#ifdef CONFIG_X86_64
-		sp = round_down(sp, 64);
-#endif /* CONFIG_X86_64 */
+		sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+				     &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
 	}
 
@@ -244,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	if (onsigstack && !likely(on_sig_stack(sp)))
 		return (void __user *)-1L;
 
-	/* save i387 state */
-	if (used_math() && save_i387_xstate(*fpstate) < 0)
+	/* save i387 and extended state */
+	if (used_math() &&
+	    save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 
 	return (void __user *)sp;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 7a3d4df9eaf..0923d27f23d 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -10,9 +10,7 @@
 #include <linux/compat.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
-#ifdef CONFIG_IA32_EMULATION
-#include <asm/sigcontext32.h>
-#endif
+#include <asm/sigframe.h>
 #include <asm/xcr.h>
 
 /*
@@ -25,11 +23,7 @@ u64 pcntxt_mask;
  */
 static struct xsave_struct *init_xstate_buf;
 
-struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-struct _fpx_sw_bytes fx_sw_reserved_ia32;
-#endif
-
+static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
 static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
 
 /*
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
  */
 void __sanitize_i387_state(struct task_struct *tsk)
 {
-	u64 xstate_bv;
-	int feature_bit = 0x2;
 	struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
+	int feature_bit = 0x2;
+	u64 xstate_bv;
 
 	if (!fx)
 		return;
@@ -104,215 +98,314 @@ void __sanitize_i387_state(struct task_struct *tsk)
  * Check for the presence of extended state information in the
  * user fpstate pointer in the sigcontext.
  */
-int check_for_xstate(struct i387_fxsave_struct __user *buf,
-		     void __user *fpstate,
-		     struct _fpx_sw_bytes *fx_sw_user)
+static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
+				   void __user *fpstate,
+				   struct _fpx_sw_bytes *fx_sw)
 {
 	int min_xstate_size = sizeof(struct i387_fxsave_struct) +
 			      sizeof(struct xsave_hdr_struct);
 	unsigned int magic2;
-	int err;
 
-	err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
-			       sizeof(struct _fpx_sw_bytes));
-	if (err)
-		return -EFAULT;
+	if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
+		return -1;
 
-	/*
-	 * First Magic check failed.
-	 */
-	if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
-		return -EINVAL;
+	/* Check for the first magic field and other error scenarios. */
+	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
+	    fx_sw->xstate_size < min_xstate_size ||
+	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fx_sw->extended_size)
+		return -1;
 
-	/*
-	 * Check for error scenarios.
-	 */
-	if (fx_sw_user->xstate_size < min_xstate_size ||
-	    fx_sw_user->xstate_size > xstate_size ||
-	    fx_sw_user->xstate_size > fx_sw_user->extended_size)
-		return -EINVAL;
-
-	err = __get_user(magic2, (__u32 __user *) (fpstate +
-						   fx_sw_user->extended_size -
-						   FP_XSTATE_MAGIC2_SIZE));
-	if (err)
-		return err;
 	/*
 	 * Check for the presence of second magic word at the end of memory
 	 * layout. This detects the case where the user just copied the legacy
 	 * fpstate layout with out copying the extended state information
 	 * in the memory layout.
 	 */
-	if (magic2 != FP_XSTATE_MAGIC2)
-		return -EFAULT;
+	if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
+	    || magic2 != FP_XSTATE_MAGIC2)
+		return -1;
 
 	return 0;
 }
 
-#ifdef CONFIG_X86_64
 /*
  * Signal frame handlers.
  */
+static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
+{
+	if (use_fxsr()) {
+		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+		struct user_i387_ia32_struct env;
+		struct _fpstate_ia32 __user *fp = buf;
 
-int save_i387_xstate(void __user *buf)
+		convert_from_fxsr(&env, tsk);
+
+		if (__copy_to_user(buf, &env, sizeof(env)) ||
+		    __put_user(xsave->i387.swd, &fp->status) ||
+		    __put_user(X86_FXSR_MAGIC, &fp->magic))
+			return -1;
+	} else {
+		struct i387_fsave_struct __user *fp = buf;
+		u32 swd;
+		if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 {
-	struct task_struct *tsk = current;
-	int err = 0;
+	struct xsave_struct __user *x = buf;
+	struct _fpx_sw_bytes *sw_bytes;
+	u32 xstate_bv;
+	int err;
 
-	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
-		return -EACCES;
+	/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
+	sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
+	err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
 
-	BUG_ON(sig_xstate_size < xstate_size);
+	if (!use_xsave())
+		return err;
 
-	if ((unsigned long)buf % 64)
-		pr_err("%s: bad fpstate %p\n", __func__, buf);
+	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
 
-	if (!used_math())
-		return 0;
+	/*
+	 * Read the xstate_bv which we copied (directly from the cpu or
+	 * from the state in task struct) to the user buffers.
+	 */
+	err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
 
-	if (user_has_fpu()) {
-		if (use_xsave())
-			err = xsave_user(buf);
-		else
-			err = fxsave_user(buf);
+	/*
+	 * For legacy compatible, we always set FP/SSE bits in the bit
+	 * vector while saving the state to the user context. This will
+	 * enable us capturing any changes(during sigreturn) to
+	 * the FP/SSE bits by the legacy applications which don't touch
+	 * xstate_bv in the xsave header.
+	 *
+	 * xsave aware apps can change the xstate_bv in the xsave
+	 * header as well as change any contents in the memory layout.
+	 * xrestore as part of sigreturn will capture all the changes.
+	 */
+	xstate_bv |= XSTATE_FPSSE;
 
-		if (unlikely(err)) {
-			__clear_user(buf, xstate_size);
-			return err;
-		}
+	err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
+
+	return err;
+}
+
+static inline int save_user_xstate(struct xsave_struct __user *buf)
+{
+	int err;
+
+	if (use_xsave())
+		err = xsave_user(buf);
+	else if (use_fxsr())
+		err = fxsave_user((struct i387_fxsave_struct __user *) buf);
+	else
+		err = fsave_user((struct i387_fsave_struct __user *) buf);
+
+	if (unlikely(err) && __clear_user(buf, xstate_size))
+		err = -EFAULT;
+	return err;
+}
+
+/*
+ * Save the fpu, extended register state to the user signal frame.
+ *
+ * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
+ *  state is copied.
+ *  'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
+ *
+ *	buf == buf_fx for 64-bit frames and 32-bit fsave frame.
+ *	buf != buf_fx for 32-bit frames with fxstate.
+ *
+ * If the fpu, extended register state is live, save the state directly
+ * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
+ * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ *
+ * If this is a 32-bit frame with fxstate, put a fsave header before
+ * the aligned state at 'buf_fx'.
+ *
+ * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
+ * indicating the absence/presence of the extended state to the user.
+ */
+int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
+{
+	struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
+	struct task_struct *tsk = current;
+	int ia32_fxstate = (buf != buf_fx);
+
+	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+			 config_enabled(CONFIG_IA32_EMULATION));
+
+	if (!access_ok(VERIFY_WRITE, buf, size))
+		return -EACCES;
+
+	if (!HAVE_HWFP)
+		return fpregs_soft_get(current, NULL, 0,
+			sizeof(struct user_i387_ia32_struct), NULL,
+			(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
+
+	if (user_has_fpu()) {
+		/* Save the live register state to the user directly. */
+		if (save_user_xstate(buf_fx))
+			return -1;
+		/* Update the thread's fxstate to save the fsave header. */
+		if (ia32_fxstate)
+			fpu_fxsave(&tsk->thread.fpu);
 		user_fpu_end();
 	} else {
 		sanitize_i387_state(tsk);
-		if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
-				   xstate_size))
+		if (__copy_to_user(buf_fx, xsave, xstate_size))
 			return -1;
 	}
 
-	clear_used_math(); /* trigger finit */
+	/* Save the fsave header for the 32-bit frames. */
+	if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
+		return -1;
 
-	if (use_xsave()) {
-		struct _fpstate __user *fx = buf;
-		struct _xstate __user *x = buf;
-		u64 xstate_bv;
+	if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
+		return -1;
+
+	drop_fpu(tsk);	/* trigger finit */
+
+	return 0;
+}
 
-		err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
-				     sizeof(struct _fpx_sw_bytes));
+static inline void
+sanitize_restored_xstate(struct task_struct *tsk,
+			 struct user_i387_ia32_struct *ia32_env,
+			 u64 xstate_bv, int fx_only)
+{
+	struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+	struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
 
-		err |= __put_user(FP_XSTATE_MAGIC2,
-				  (__u32 __user *) (buf + sig_xstate_size
-						    - FP_XSTATE_MAGIC2_SIZE));
+	if (use_xsave()) {
+		/* These bits must be zero. */
+		xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
 
 		/*
-		 * Read the xstate_bv which we copied (directly from the cpu or
-		 * from the state in task struct) to the user buffers and
-		 * set the FP/SSE bits.
+		 * Init the state that is not present in the memory
+		 * layout and not enabled by the OS.
 		 */
-		err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+		if (fx_only)
+			xsave_hdr->xstate_bv = XSTATE_FPSSE;
+		else
+			xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
+	}
 
+	if (use_fxsr()) {
 		/*
-		 * For legacy compatible, we always set FP/SSE bits in the bit
-		 * vector while saving the state to the user context. This will
-		 * enable us capturing any changes(during sigreturn) to
-		 * the FP/SSE bits by the legacy applications which don't touch
-		 * xstate_bv in the xsave header.
-		 *
-		 * xsave aware apps can change the xstate_bv in the xsave
-		 * header as well as change any contents in the memory layout.
-		 * xrestore as part of sigreturn will capture all the changes.
+		 * mscsr reserved bits must be masked to zero for security
+		 * reasons.
 		 */
-		xstate_bv |= XSTATE_FPSSE;
-
-		err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+		xsave->i387.mxcsr &= mxcsr_feature_mask;
 
-		if (err)
-			return err;
+		convert_to_fxsr(tsk, ia32_env);
 	}
-
-	return 1;
 }
 
 /*
- * Restore the extended state if present. Otherwise, restore the FP/SSE
- * state.
+ * Restore the extended state if present. Otherwise, restore the FP/SSE state.
  */
-static int restore_user_xstate(void __user *buf)
+static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
 {
-	struct _fpx_sw_bytes fx_sw_user;
-	u64 mask;
-	int err;
-
-	if (((unsigned long)buf % 64) ||
-	     check_for_xstate(buf, buf, &fx_sw_user))
-		goto fx_only;
-
-	mask = fx_sw_user.xstate_bv;
-
-	/*
-	 * restore the state passed by the user.
-	 */
-	err = xrestore_user(buf, mask);
-	if (err)
-		return err;
-
-	/*
-	 * init the state skipped by the user.
-	 */
-	mask = pcntxt_mask & ~mask;
-	if (unlikely(mask))
-		xrstor_state(init_xstate_buf, mask);
-
-	return 0;
-
-fx_only:
-	/*
-	 * couldn't find the extended state information in the
-	 * memory layout. Restore just the FP/SSE and init all
-	 * the other extended state.
-	 */
-	xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
-	return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+	if (use_xsave()) {
+		if ((unsigned long)buf % 64 || fx_only) {
+			u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
+			xrstor_state(init_xstate_buf, init_bv);
+			return fxrstor_checking((__force void *) buf);
+		} else {
+			u64 init_bv = pcntxt_mask & ~xbv;
+			if (unlikely(init_bv))
+				xrstor_state(init_xstate_buf, init_bv);
+			return xrestore_user(buf, xbv);
+		}
+	} else if (use_fxsr()) {
+		return fxrstor_checking((__force void *) buf);
+	} else
+		return frstor_checking((__force void *) buf);
 }
 
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-int restore_i387_xstate(void __user *buf)
+int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 {
+	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
-	int err = 0;
+	int state_size = xstate_size;
+	u64 xstate_bv = 0;
+	int fx_only = 0;
+
+	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+			 config_enabled(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
-		if (used_math())
-			goto clear;
+		drop_fpu(tsk);
 		return 0;
-	} else
-		if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
-			return -EACCES;
+	}
+
+	if (!access_ok(VERIFY_READ, buf, size))
+		return -EACCES;
+
+	if (!used_math() && init_fpu(tsk))
+		return -1;
 
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
+	if (!HAVE_HWFP) {
+		return fpregs_soft_set(current, NULL,
+				       0, sizeof(struct user_i387_ia32_struct),
+				       NULL, buf) != 0;
 	}
 
-	user_fpu_begin();
-	if (use_xsave())
-		err = restore_user_xstate(buf);
-	else
-		err = fxrstor_checking((__force struct i387_fxsave_struct *)
-				       buf);
-	if (unlikely(err)) {
+	if (use_xsave()) {
+		struct _fpx_sw_bytes fx_sw_user;
+		if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
+			/*
+			 * Couldn't find the extended state information in the
+			 * memory layout. Restore just the FP/SSE and init all
+			 * the other extended state.
+			 */
+			state_size = sizeof(struct i387_fxsave_struct);
+			fx_only = 1;
+		} else {
+			state_size = fx_sw_user.xstate_size;
+			xstate_bv = fx_sw_user.xstate_bv;
+		}
+	}
+
+	if (ia32_fxstate) {
+		/*
+		 * For 32-bit frames with fxstate, copy the user state to the
+		 * thread's fpu state, reconstruct fxstate from the fsave
+		 * header. Sanitize the copied state etc.
+		 */
+		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+		struct user_i387_ia32_struct env;
+
+		stop_fpu_preload(tsk);
+		unlazy_fpu(tsk);
+
+		if (__copy_from_user(xsave, buf_fx, state_size) ||
+		    __copy_from_user(&env, buf, sizeof(env))) {
+			drop_fpu(tsk);
+			return -1;
+		}
+
+		sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
+	} else {
 		/*
-		 * Encountered an error while doing the restore from the
-		 * user buffer, clear the fpu state.
+		 * For 64-bit frames and 32-bit fsave frames, restore the user
+		 * state to the registers directly (with exceptions handled).
 		 */
-clear:
-		clear_fpu(tsk);
-		clear_used_math();
+		user_fpu_begin();
+		if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
+			drop_fpu(tsk);
+			return -1;
+		}
 	}
-	return err;
+
+	return 0;
 }
-#endif
 
 /*
  * Prepare the SW reserved portion of the fxsave memory layout, indicating
@@ -323,31 +416,22 @@ clear:
  */
 static void prepare_fx_sw_frame(void)
 {
-	int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
-			     FP_XSTATE_MAGIC2_SIZE;
-
-	sig_xstate_size = sizeof(struct _fpstate) + size_extended;
-
-#ifdef CONFIG_IA32_EMULATION
-	sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
-#endif
+	int fsave_header_size = sizeof(struct i387_fsave_struct);
+	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
-	memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+	if (config_enabled(CONFIG_X86_32))
+		size += fsave_header_size;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
-	fx_sw_reserved.extended_size = sig_xstate_size;
+	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xstate_bv = pcntxt_mask;
 	fx_sw_reserved.xstate_size = xstate_size;
-#ifdef CONFIG_IA32_EMULATION
-	memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
-	       sizeof(struct _fpx_sw_bytes));
-	fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
-#endif
-}
 
-#ifdef CONFIG_X86_64
-unsigned int sig_xstate_size = sizeof(struct _fpstate);
-#endif
+	if (config_enabled(CONFIG_IA32_EMULATION)) {
+		fx_sw_reserved_ia32 = fx_sw_reserved;
+		fx_sw_reserved_ia32.extended_size += fsave_header_size;
+	}
+}
 
 /*
  * Enable the extended processor state save/restore feature
-- 
cgit v1.2.3-70-g09d2


From e962591749dfd4df9fea2c530ed7a3cfed50e5aa Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 24 Aug 2012 14:12:57 -0700
Subject: x86, fpu: drop_fpu() before restoring new state from sigframe

No need to save the state with unlazy_fpu(), that is about to get overwritten
by the state from the signal frame. Instead use drop_fpu() and continue
to restore the new state.

Also fold the stop_fpu_preload() into drop_fpu().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h | 7 +------
 arch/x86/kernel/xsave.c             | 8 +++-----
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 4fbb4195bc6..78169d133d4 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk)
 	preempt_enable();
 }
 
-static inline void stop_fpu_preload(struct task_struct *tsk)
-{
-	tsk->fpu_counter = 0;
-}
-
 static inline void drop_fpu(struct task_struct *tsk)
 {
 	/*
 	 * Forget coprocessor state..
 	 */
-	stop_fpu_preload(tsk);
+	tsk->fpu_counter = 0;
 	preempt_disable();
 	__drop_fpu(tsk);
 	preempt_enable();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 0923d27f23d..07ddc870640 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -382,16 +382,14 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
 		struct user_i387_ia32_struct env;
 
-		stop_fpu_preload(tsk);
-		unlazy_fpu(tsk);
+		drop_fpu(tsk);
 
 		if (__copy_from_user(xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env))) {
-			drop_fpu(tsk);
+		    __copy_from_user(&env, buf, sizeof(env)))
 			return -1;
-		}
 
 		sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
+		set_used_math();
 	} else {
 		/*
 		 * For 64-bit frames and 32-bit fsave frames, restore the user
-- 
cgit v1.2.3-70-g09d2


From 377ffbcc536a5a6666dc077395163ab149c02610 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 24 Aug 2012 14:12:58 -0700
Subject: x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()

Few lines below we do drop_fpu() which is more safer. Remove the
unnecessary user_fpu_end() in save_xstate_sig(), which allows
the drop_fpu() to ignore any pending exceptions from the user-space
and drop the current fpu.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-3-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h | 17 +++--------------
 arch/x86/kernel/xsave.c             |  1 -
 2 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 78169d133d4..52202a6b12a 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -412,22 +412,11 @@ static inline void __drop_fpu(struct task_struct *tsk)
 }
 
 /*
- * The actual user_fpu_begin/end() functions
- * need to be preemption-safe.
+ * Need to be preemption-safe.
  *
- * NOTE! user_fpu_end() must be used only after you
- * have saved the FP state, and user_fpu_begin() must
- * be used only immediately before restoring it.
- * These functions do not do any save/restore on
- * their own.
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * it. This function does not do any save/restore on their own.
  */
-static inline void user_fpu_end(void)
-{
-	preempt_disable();
-	__thread_fpu_end(current);
-	preempt_enable();
-}
-
 static inline void user_fpu_begin(void)
 {
 	preempt_disable();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 07ddc870640..4ac5f2e135b 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -255,7 +255,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		/* Update the thread's fxstate to save the fsave header. */
 		if (ia32_fxstate)
 			fpu_fxsave(&tsk->thread.fpu);
-		user_fpu_end();
 	} else {
 		sanitize_i387_state(tsk);
 		if (__copy_to_user(buf_fx, xsave, xstate_size))
-- 
cgit v1.2.3-70-g09d2


From 9c1c3fac53378c9782c18f80107965578d7b7167 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 24 Aug 2012 14:12:59 -0700
Subject: x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()

kvm's guest fpu save/restore should be wrapped around
kernel_fpu_begin/end(). This will avoid for example taking a DNA
in kvm_load_guest_fpu() when it tries to load the fpu immediately
after doing unlazy_fpu() on the host side.

More importantly this will prevent the host process fpu from being
corrupted.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-4-git-send-email-suresh.b.siddha@intel.com
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 148ed666e31..cf637f572bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5972,7 +5972,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 */
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
-	unlazy_fpu(current);
+	kernel_fpu_begin();
 	fpu_restore_checking(&vcpu->arch.guest_fpu);
 	trace_kvm_fpu(1);
 }
@@ -5986,6 +5986,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 	vcpu->guest_fpu_loaded = 0;
 	fpu_save_init(&vcpu->arch.guest_fpu);
+	kernel_fpu_end();
 	++vcpu->stat.fpu_reload;
 	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
 	trace_kvm_fpu(0);
-- 
cgit v1.2.3-70-g09d2


From 841e3604d35aa70d399146abdc526d8c89a2c2f5 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 24 Aug 2012 14:13:00 -0700
Subject: x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage

use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and
saving/restoring just the few used xmm/ymm registers.

This has some advantages like:
* If the task's FPU state is already active, then kernel_fpu_begin()
  will just save the user-state and avoiding the read/write of cr0.
  In general, cr0 accesses are much slower.

* Manual save/restore of xmm/ymm registers will affect the 'modified' and
  the 'init' optimizations brought in the by xsaveopt/xrstor
  infrastructure.

* Foward compatibility with future vector register extensions will be a
  problem if the xmm/ymm registers are manually saved and restored
  (corrupting the extended state of those vector registers).

With this patch, there was no significant difference in the xor throughput
using AVX, measured during boot.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-5-git-send-email-suresh.b.siddha@intel.com
Cc: Jim Kukunas <james.t.kukunas@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/xor_32.h  | 56 ++++++--------------------------------
 arch/x86/include/asm/xor_64.h  | 61 +++++++-----------------------------------
 arch/x86/include/asm/xor_avx.h | 54 +++++++++----------------------------
 3 files changed, 29 insertions(+), 142 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 454570891bd..aabd5850bdb 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
  */
 
-#define XMMS_SAVE				\
-do {						\
-	preempt_disable();			\
-	cr0 = read_cr0();			\
-	clts();					\
-	asm volatile(				\
-		"movups %%xmm0,(%0)	;\n\t"	\
-		"movups %%xmm1,0x10(%0)	;\n\t"	\
-		"movups %%xmm2,0x20(%0)	;\n\t"	\
-		"movups %%xmm3,0x30(%0)	;\n\t"	\
-		:				\
-		: "r" (xmm_save) 		\
-		: "memory");			\
-} while (0)
-
-#define XMMS_RESTORE				\
-do {						\
-	asm volatile(				\
-		"sfence			;\n\t"	\
-		"movups (%0),%%xmm0	;\n\t"	\
-		"movups 0x10(%0),%%xmm1	;\n\t"	\
-		"movups 0x20(%0),%%xmm2	;\n\t"	\
-		"movups 0x30(%0),%%xmm3	;\n\t"	\
-		:				\
-		: "r" (xmm_save)		\
-		: "memory");			\
-	write_cr0(cr0);				\
-	preempt_enable();			\
-} while (0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
 #define OFFS(x)		"16*("#x")"
 #define PF_OFFS(x)	"256+16*("#x")"
 #define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
@@ -587,10 +555,8 @@ static void
 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 {
 	unsigned long lines = bytes >> 8;
-	char xmm_save[16*4] ALIGN16;
-	int cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 	:
 	: "memory");
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3)
 {
 	unsigned long lines = bytes >> 8;
-	char xmm_save[16*4] ALIGN16;
-	int cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	:
 	: "memory" );
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3, unsigned long *p4)
 {
 	unsigned long lines = bytes >> 8;
-	char xmm_save[16*4] ALIGN16;
-	int cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	:
 	: "memory" );
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
 {
 	unsigned long lines = bytes >> 8;
-	char xmm_save[16*4] ALIGN16;
-	int cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	/* Make sure GCC forgets anything it knows about p4 or p5,
 	   such that it won't pass to the asm volatile below a
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	   like assuming they have some legal value.  */
 	asm("" : "=r" (p4), "=r" (p5));
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static struct xor_block_template xor_block_pIII_sse = {
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index b9b2323e90f..5fc06d0b7eb 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -34,41 +34,7 @@
  * no advantages to be gotten from x86-64 here anyways.
  */
 
-typedef struct {
-	unsigned long a, b;
-} __attribute__((aligned(16))) xmm_store_t;
-
-/* Doesn't use gcc to save the XMM registers, because there is no easy way to
-   tell it to do a clts before the register saving. */
-#define XMMS_SAVE				\
-do {						\
-	preempt_disable();			\
-	asm volatile(				\
-		"movq %%cr0,%0		;\n\t"	\
-		"clts			;\n\t"	\
-		"movups %%xmm0,(%1)	;\n\t"	\
-		"movups %%xmm1,0x10(%1)	;\n\t"	\
-		"movups %%xmm2,0x20(%1)	;\n\t"	\
-		"movups %%xmm3,0x30(%1)	;\n\t"	\
-		: "=&r" (cr0)			\
-		: "r" (xmm_save) 		\
-		: "memory");			\
-} while (0)
-
-#define XMMS_RESTORE				\
-do {						\
-	asm volatile(				\
-		"sfence			;\n\t"	\
-		"movups (%1),%%xmm0	;\n\t"	\
-		"movups 0x10(%1),%%xmm1	;\n\t"	\
-		"movups 0x20(%1),%%xmm2	;\n\t"	\
-		"movups 0x30(%1),%%xmm3	;\n\t"	\
-		"movq 	%0,%%cr0	;\n\t"	\
-		:				\
-		: "r" (cr0), "r" (xmm_save)	\
-		: "memory");			\
-	preempt_enable();			\
-} while (0)
+#include <asm/i387.h>
 
 #define OFFS(x)		"16*("#x")"
 #define PF_OFFS(x)	"256+16*("#x")"
@@ -91,10 +57,8 @@ static void
 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 {
 	unsigned int lines = bytes >> 8;
-	unsigned long cr0;
-	xmm_store_t xmm_save[4];
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 	: [inc] "r" (256UL)
 	: "memory");
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3)
 {
 	unsigned int lines = bytes >> 8;
-	xmm_store_t xmm_save[4];
-	unsigned long cr0;
-
-	XMMS_SAVE;
 
+	kernel_fpu_begin();
 	asm volatile(
 #undef BLOCK
 #define BLOCK(i) \
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
 	: [inc] "r" (256UL)
 	: "memory");
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3, unsigned long *p4)
 {
 	unsigned int lines = bytes >> 8;
-	xmm_store_t xmm_save[4];
-	unsigned long cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	: [inc] "r" (256UL)
 	: "memory" );
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static void
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
 {
 	unsigned int lines = bytes >> 8;
-	xmm_store_t xmm_save[4];
-	unsigned long cr0;
 
-	XMMS_SAVE;
+	kernel_fpu_begin();
 
 	asm volatile(
 #undef BLOCK
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	: [inc] "r" (256UL)
 	: "memory");
 
-	XMMS_RESTORE;
+	kernel_fpu_end();
 }
 
 static struct xor_block_template xor_block_sse = {
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 2510d35f480..7ea79c5fa1f 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -20,32 +20,6 @@
 #include <linux/compiler.h>
 #include <asm/i387.h>
 
-#define ALIGN32 __aligned(32)
-
-#define YMM_SAVED_REGS 4
-
-#define YMMS_SAVE \
-do { \
-	preempt_disable(); \
-	cr0 = read_cr0(); \
-	clts(); \
-	asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
-	asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
-	asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
-	asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
-} while (0);
-
-#define YMMS_RESTORE \
-do { \
-	asm volatile("sfence" : : : "memory"); \
-	asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
-	asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
-	asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
-	asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
-	write_cr0(cr0); \
-	preempt_enable(); \
-} while (0);
-
 #define BLOCK4(i) \
 		BLOCK(32 * i, 0) \
 		BLOCK(32 * (i + 1), 1) \
@@ -60,10 +34,9 @@ do { \
 
 static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
 {
-	unsigned long cr0, lines = bytes >> 9;
-	char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+	unsigned long lines = bytes >> 9;
 
-	YMMS_SAVE
+	kernel_fpu_begin();
 
 	while (lines--) {
 #undef BLOCK
@@ -82,16 +55,15 @@ do { \
 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 	}
 
-	YMMS_RESTORE
+	kernel_fpu_end();
 }
 
 static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 	unsigned long *p2)
 {
-	unsigned long cr0, lines = bytes >> 9;
-	char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+	unsigned long lines = bytes >> 9;
 
-	YMMS_SAVE
+	kernel_fpu_begin();
 
 	while (lines--) {
 #undef BLOCK
@@ -113,16 +85,15 @@ do { \
 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 	}
 
-	YMMS_RESTORE
+	kernel_fpu_end();
 }
 
 static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 	unsigned long *p2, unsigned long *p3)
 {
-	unsigned long cr0, lines = bytes >> 9;
-	char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+	unsigned long lines = bytes >> 9;
 
-	YMMS_SAVE
+	kernel_fpu_begin();
 
 	while (lines--) {
 #undef BLOCK
@@ -147,16 +118,15 @@ do { \
 		p3 = (unsigned long *)((uintptr_t)p3 + 512);
 	}
 
-	YMMS_RESTORE
+	kernel_fpu_end();
 }
 
 static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 	unsigned long *p2, unsigned long *p3, unsigned long *p4)
 {
-	unsigned long cr0, lines = bytes >> 9;
-	char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+	unsigned long lines = bytes >> 9;
 
-	YMMS_SAVE
+	kernel_fpu_begin();
 
 	while (lines--) {
 #undef BLOCK
@@ -184,7 +154,7 @@ do { \
 		p4 = (unsigned long *)((uintptr_t)p4 + 512);
 	}
 
-	YMMS_RESTORE
+	kernel_fpu_end();
 }
 
 static struct xor_block_template xor_block_avx = {
-- 
cgit v1.2.3-70-g09d2


From 304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 24 Aug 2012 14:13:02 -0700
Subject: x86, fpu: use non-lazy fpu restore for processors supporting xsave

Fundamental model of the current Linux kernel is to lazily init and
restore FPU instead of restoring the task state during context switch.
This changes that fundamental lazy model to the non-lazy model for
the processors supporting xsave feature.

Reasons driving this model change are:

i. Newer processors support optimized state save/restore using xsaveopt and
xrstor by tracking the INIT state and MODIFIED state during context-switch.
This is faster than modifying the cr0.TS bit which has serializing semantics.

ii. Newer glibc versions use SSE for some of the optimized copy/clear routines.
With certain workloads (like boot, kernel-compilation etc), application
completes its work with in the first 5 task switches, thus taking upto 5 #DNA
traps with the kernel not getting a chance to apply the above mentioned
pre-load heuristic.

iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit
and thus will not work correctly in the presence of lazy restore. Non-lazy
state restore is needed for enabling such features.

Some data on a two socket SNB system:
 * Saved 20K DNA exceptions during boot on a two socket SNB system.
 * Saved 50K DNA exceptions during kernel-compilation workload.
 * Improved throughput of the AVX based checksumming function inside the
   kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts
   pair.

Also now kernel_fpu_begin/end() relies on the patched
alternative instructions. So move check_fpu() which uses the
kernel_fpu_begin/end() after alternative_instructions().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com
Merge 32-bit boot fix from,
Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com
Cc: Jim Kukunas <james.t.kukunas@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h | 96 ++++++++++++++++++++++++-------------
 arch/x86/include/asm/i387.h         |  1 +
 arch/x86/include/asm/xsave.h        |  1 +
 arch/x86/kernel/cpu/bugs.c          |  7 ++-
 arch/x86/kernel/i387.c              | 20 ++++++--
 arch/x86/kernel/process.c           | 12 +++--
 arch/x86/kernel/process_32.c        |  4 --
 arch/x86/kernel/process_64.c        |  4 --
 arch/x86/kernel/traps.c             |  5 +-
 arch/x86/kernel/xsave.c             | 57 +++++++++++++++++-----
 10 files changed, 146 insertions(+), 61 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 52202a6b12a..8ca0f9f45ac 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
 static inline void __thread_fpu_end(struct task_struct *tsk)
 {
 	__thread_clear_has_fpu(tsk);
-	stts();
+	if (!use_xsave())
+		stts();
 }
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-	clts();
+	if (!use_xsave())
+		clts();
 	__thread_set_has_fpu(tsk);
 }
 
+static inline void __drop_fpu(struct task_struct *tsk)
+{
+	if (__thread_has_fpu(tsk)) {
+		/* Ignore delayed exceptions from user space */
+		asm volatile("1: fwait\n"
+			     "2:\n"
+			     _ASM_EXTABLE(1b, 2b));
+		__thread_fpu_end(tsk);
+	}
+}
+
+static inline void drop_fpu(struct task_struct *tsk)
+{
+	/*
+	 * Forget coprocessor state..
+	 */
+	preempt_disable();
+	tsk->fpu_counter = 0;
+	__drop_fpu(tsk);
+	clear_used_math();
+	preempt_enable();
+}
+
+static inline void drop_init_fpu(struct task_struct *tsk)
+{
+	if (!use_xsave())
+		drop_fpu(tsk);
+	else
+		xrstor_state(init_xstate_buf, -1);
+}
+
 /*
  * FPU state switching for scheduling.
  *
@@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 {
 	fpu_switch_t fpu;
 
-	fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+	/*
+	 * If the task has used the math, pre-load the FPU on xsave processors
+	 * or if the past 5 consecutive context-switches used math.
+	 */
+	fpu.preload = tsk_used_math(new) && (use_xsave() ||
+					     new->fpu_counter > 5);
 	if (__thread_has_fpu(old)) {
 		if (!__save_init_fpu(old))
 			cpu = ~0;
@@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 			new->fpu_counter++;
 			__thread_set_has_fpu(new);
 			prefetch(new->thread.fpu.state);
-		} else
+		} else if (!use_xsave())
 			stts();
 	} else {
 		old->fpu_counter = 0;
 		old->thread.fpu.last_cpu = ~0;
 		if (fpu.preload) {
 			new->fpu_counter++;
-			if (fpu_lazy_restore(new, cpu))
+			if (!use_xsave() && fpu_lazy_restore(new, cpu))
 				fpu.preload = 0;
 			else
 				prefetch(new->thread.fpu.state);
@@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
 {
 	if (fpu.preload) {
 		if (unlikely(restore_fpu_checking(new)))
-			__thread_fpu_end(new);
+			drop_init_fpu(new);
 	}
 }
 
@@ -400,17 +438,6 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
 	return __restore_xstate_sig(buf, buf_fx, size);
 }
 
-static inline void __drop_fpu(struct task_struct *tsk)
-{
-	if (__thread_has_fpu(tsk)) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		__thread_fpu_end(tsk);
-	}
-}
-
 /*
  * Need to be preemption-safe.
  *
@@ -431,24 +458,18 @@ static inline void user_fpu_begin(void)
 static inline void save_init_fpu(struct task_struct *tsk)
 {
 	WARN_ON_ONCE(!__thread_has_fpu(tsk));
+
+	if (use_xsave()) {
+		xsave_state(&tsk->thread.fpu.state->xsave, -1);
+		return;
+	}
+
 	preempt_disable();
 	__save_init_fpu(tsk);
 	__thread_fpu_end(tsk);
 	preempt_enable();
 }
 
-static inline void drop_fpu(struct task_struct *tsk)
-{
-	/*
-	 * Forget coprocessor state..
-	 */
-	tsk->fpu_counter = 0;
-	preempt_disable();
-	__drop_fpu(tsk);
-	preempt_enable();
-	clear_used_math();
-}
-
 /*
  * i387 state interaction
  */
@@ -503,12 +524,21 @@ static inline void fpu_free(struct fpu *fpu)
 	}
 }
 
-static inline void fpu_copy(struct fpu *dst, struct fpu *src)
+static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
 {
-	memcpy(dst->state, src->state, xstate_size);
-}
+	if (use_xsave()) {
+		struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;
 
-extern void fpu_finit(struct fpu *fpu);
+		memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
+		xsave_state(xsave, -1);
+	} else {
+		struct fpu *dfpu = &dst->thread.fpu;
+		struct fpu *sfpu = &src->thread.fpu;
+
+		unlazy_fpu(src);
+		memcpy(dfpu->state, sfpu->state, xstate_size);
+	}
+}
 
 static inline unsigned long
 alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 257d9cca214..6c3bd378281 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -19,6 +19,7 @@ struct pt_regs;
 struct user_i387_struct;
 
 extern int init_fpu(struct task_struct *child);
+extern void fpu_finit(struct fpu *fpu);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 extern void math_state_restore(void);
 
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c1d989a1519..2ddee1b8779 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -34,6 +34,7 @@
 extern unsigned int xstate_size;
 extern u64 pcntxt_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+extern struct xsave_struct *init_xstate_buf;
 
 extern void xsave_init(void);
 extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c97bb7b5a9f..d0e910da16c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -165,10 +165,15 @@ void __init check_bugs(void)
 	print_cpu_info(&boot_cpu_data);
 #endif
 	check_config();
-	check_fpu();
 	check_hlt();
 	check_popad();
 	init_utsname()->machine[1] =
 		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
 	alternative_instructions();
+
+	/*
+	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
+	 * alternative instructions.
+	 */
+	check_fpu();
 }
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index ab6a2e8028a..528557470dd 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -22,7 +22,15 @@
 /*
  * Were we in an interrupt that interrupted kernel mode?
  *
- * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * For now, on xsave platforms we will return interrupted
+ * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
+ * for xsave platforms, ideally we can change the return value
+ * to something like __thread_has_fpu(current). But we need to
+ * be careful of doing __thread_clear_has_fpu() before saving
+ * the FPU etc for supporting nested uses etc. For now, take
+ * the simple route!
+ *
+ * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
  * pair does nothing at all: the thread must not have fpu (so
  * that we don't try to save the FPU state), and TS must
  * be set (so that the clts/stts pair does nothing that is
@@ -30,6 +38,9 @@
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
+	if (use_xsave())
+		return 0;
+
 	return !__thread_has_fpu(current) &&
 		(read_cr0() & X86_CR0_TS);
 }
@@ -73,7 +84,7 @@ void kernel_fpu_begin(void)
 		__save_init_fpu(me);
 		__thread_clear_has_fpu(me);
 		/* We do 'stts()' in kernel_fpu_end() */
-	} else {
+	} else if (!use_xsave()) {
 		this_cpu_write(fpu_owner_task, NULL);
 		clts();
 	}
@@ -82,7 +93,10 @@ EXPORT_SYMBOL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
-	stts();
+	if (use_xsave())
+		math_state_restore();
+	else
+		stts();
 	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_fpu_end);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 30069d1a6a4..c21e30f8923 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	int ret;
 
-	unlazy_fpu(src);
-
 	*dst = *src;
 	if (fpu_allocated(&src->thread.fpu)) {
 		memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
 		ret = fpu_alloc(&dst->thread.fpu);
 		if (ret)
 			return ret;
-		fpu_copy(&dst->thread.fpu, &src->thread.fpu);
+		fpu_copy(dst, src);
 	}
 	return 0;
 }
@@ -153,7 +151,13 @@ void flush_thread(void)
 
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-	drop_fpu(tsk);
+	drop_init_fpu(tsk);
+	/*
+	 * Free the FPU state for non xsave platforms. They get reallocated
+	 * lazily at the first use.
+	 */
+	if (!use_xsave())
+		free_thread_xstate(tsk);
 }
 
 static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121..b9ff83c7135 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	/*
-	 * Free the old FP and other extended state
-	 */
-	free_thread_xstate(current);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb..8a6d20ce197 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 	regs->cs		= _cs;
 	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
-	/*
-	 * Free the old FP and other extended state
-	 */
-	free_thread_xstate(current);
 }
 
 void
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c936..ac7d5275f6e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -613,11 +613,12 @@ void math_state_restore(void)
 	}
 
 	__thread_fpu_begin(tsk);
+
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
-		__thread_fpu_end(tsk);
+		drop_init_fpu(tsk);
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
@@ -629,6 +630,8 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+	BUG_ON(use_xsave());
+
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4ac5f2e135b..e7752bd7cac 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -21,7 +21,7 @@ u64 pcntxt_mask;
 /*
  * Represents init state for the supported extended state.
  */
-static struct xsave_struct *init_xstate_buf;
+struct xsave_struct *init_xstate_buf;
 
 static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
 static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
@@ -268,7 +268,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 	if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
 		return -1;
 
-	drop_fpu(tsk);	/* trigger finit */
+	drop_init_fpu(tsk);	/* trigger finit */
 
 	return 0;
 }
@@ -340,7 +340,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 			 config_enabled(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
-		drop_fpu(tsk);
+		drop_init_fpu(tsk);
 		return 0;
 	}
 
@@ -380,15 +380,30 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
 		struct user_i387_ia32_struct env;
+		int err = 0;
 
+		/*
+		 * Drop the current fpu which clears used_math(). This ensures
+		 * that any context-switch during the copy of the new state,
+		 * avoids the intermediate state from getting restored/saved.
+		 * Thus avoiding the new restored state from getting corrupted.
+		 * We will be ready to restore/save the state only after
+		 * set_used_math() is again set.
+		 */
 		drop_fpu(tsk);
 
 		if (__copy_from_user(xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env)))
-			return -1;
+		    __copy_from_user(&env, buf, sizeof(env))) {
+			err = -1;
+		} else {
+			sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
+			set_used_math();
+		}
 
-		sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
-		set_used_math();
+		if (use_xsave())
+			math_state_restore();
+
+		return err;
 	} else {
 		/*
 		 * For 64-bit frames and 32-bit fsave frames, restore the user
@@ -396,7 +411,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		user_fpu_begin();
 		if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
-			drop_fpu(tsk);
+			drop_init_fpu(tsk);
 			return -1;
 		}
 	}
@@ -435,10 +450,28 @@ static void prepare_fx_sw_frame(void)
  */
 static inline void xstate_enable(void)
 {
+	clts();
 	set_in_cr4(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
 }
 
+/*
+ * This is same as math_state_restore(). But use_xsave() is not yet
+ * patched to use math_state_restore().
+ */
+static inline void init_restore_xstate(void)
+{
+	init_fpu(current);
+	__thread_fpu_begin(current);
+	xrstor_state(init_xstate_buf, -1);
+}
+
+static inline void xstate_enable_ap(void)
+{
+	xstate_enable();
+	init_restore_xstate();
+}
+
 /*
  * Record the offsets and sizes of different state managed by the xsave
  * memory layout.
@@ -479,7 +512,6 @@ static void __init setup_xstate_init(void)
 					      __alignof__(struct xsave_struct));
 	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
 
-	clts();
 	/*
 	 * Init all the features state with header_bv being 0x0
 	 */
@@ -489,7 +521,6 @@ static void __init setup_xstate_init(void)
 	 * of any feature which is not represented by all zero's.
 	 */
 	xsave_state(init_xstate_buf, -1);
-	stts();
 }
 
 /*
@@ -533,6 +564,10 @@ static void __init xstate_enable_boot_cpu(void)
 
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
+
+	current->thread.fpu.state =
+	     alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
+	init_restore_xstate();
 }
 
 /*
@@ -551,6 +586,6 @@ void __cpuinit xsave_init(void)
 		return;
 
 	this_func = next_func;
-	next_func = xstate_enable;
+	next_func = xstate_enable_ap;
 	this_func();
 }
-- 
cgit v1.2.3-70-g09d2


From 5d2bd7009f306c82afddd1ca4d9763ad8473c216 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 6 Sep 2012 14:58:52 -0700
Subject: x86, fpu: decouple non-lazy/eager fpu restore from xsave

Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.

Requested-by: H. Peter Anvin <hpa@zytor.com>
Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  4 ++
 arch/x86/include/asm/cpufeature.h   |  2 +
 arch/x86/include/asm/fpu-internal.h | 54 ++++++++++++++++-------
 arch/x86/kernel/cpu/common.c        |  2 -
 arch/x86/kernel/i387.c              | 25 ++++-------
 arch/x86/kernel/process.c           |  2 +-
 arch/x86/kernel/traps.c             |  2 +-
 arch/x86/kernel/xsave.c             | 87 ++++++++++++++++++++++++-------------
 8 files changed, 112 insertions(+), 66 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5088c..741d064fdc6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			and restore using xsave. The kernel will fallback to
 			enabling legacy floating-point and sse state.
 
+	eagerfpu=	[X86]
+			on	enable eager fpu restore
+			off	disable eager fpu restore
+
 	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
 			wfi(ARM) instruction doesn't work correctly and not to
 			use it. This is also useful when using JTAG debugger.
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 6b7ee5ff682..5dd2b473ccf 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -97,6 +97,7 @@
 #define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU	(3*32+29) /* "eagerfpu" Non lazy FPU restore */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
@@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
 #define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
 #define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
+#define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 8ca0f9f45ac..0ca72f0d4b4 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 
 extern unsigned int mxcsr_feature_mask;
 extern void fpu_init(void);
+extern void eager_fpu_init(void);
 
 DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
 
@@ -84,6 +85,11 @@ static inline int is_x32_frame(void)
 
 #define X87_FSW_ES (1 << 7)	/* Exception Summary */
 
+static __always_inline __pure bool use_eager_fpu(void)
+{
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
+}
+
 static __always_inline __pure bool use_xsaveopt(void)
 {
 	return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void)
         return static_cpu_has(X86_FEATURE_FXSR);
 }
 
+static inline void fx_finit(struct i387_fxsave_struct *fx)
+{
+	memset(fx, 0, xstate_size);
+	fx->cwd = 0x37f;
+	if (cpu_has_xmm)
+		fx->mxcsr = MXCSR_DEFAULT;
+}
+
 extern void __sanitize_i387_state(struct task_struct *);
 
 static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
 static inline void __thread_fpu_end(struct task_struct *tsk)
 {
 	__thread_clear_has_fpu(tsk);
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		stts();
 }
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		clts();
 	__thread_set_has_fpu(tsk);
 }
@@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk)
 
 static inline void drop_init_fpu(struct task_struct *tsk)
 {
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		drop_fpu(tsk);
-	else
-		xrstor_state(init_xstate_buf, -1);
+	else {
+		if (use_xsave())
+			xrstor_state(init_xstate_buf, -1);
+		else
+			fxrstor_checking(&init_xstate_buf->i387);
+	}
 }
 
 /*
@@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = tsk_used_math(new) && (use_xsave() ||
+	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
 					     new->fpu_counter > 5);
 	if (__thread_has_fpu(old)) {
 		if (!__save_init_fpu(old))
@@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 			new->fpu_counter++;
 			__thread_set_has_fpu(new);
 			prefetch(new->thread.fpu.state);
-		} else if (!use_xsave())
+		} else if (!use_eager_fpu())
 			stts();
 	} else {
 		old->fpu_counter = 0;
 		old->thread.fpu.last_cpu = ~0;
 		if (fpu.preload) {
 			new->fpu_counter++;
-			if (!use_xsave() && fpu_lazy_restore(new, cpu))
+			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
 				fpu.preload = 0;
 			else
 				prefetch(new->thread.fpu.state);
@@ -452,6 +470,14 @@ static inline void user_fpu_begin(void)
 	preempt_enable();
 }
 
+static inline void __save_fpu(struct task_struct *tsk)
+{
+	if (use_xsave())
+		xsave_state(&tsk->thread.fpu.state->xsave, -1);
+	else
+		fpu_fxsave(&tsk->thread.fpu);
+}
+
 /*
  * These disable preemption on their own and are safe
  */
@@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk)
 {
 	WARN_ON_ONCE(!__thread_has_fpu(tsk));
 
-	if (use_xsave()) {
-		xsave_state(&tsk->thread.fpu.state->xsave, -1);
+	if (use_eager_fpu()) {
+		__save_fpu(tsk);
 		return;
 	}
 
@@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu)
 
 static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
 {
-	if (use_xsave()) {
-		struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;
-
-		memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
-		xsave_state(xsave, -1);
+	if (use_eager_fpu()) {
+		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
+		__save_fpu(dst);
 	} else {
 		struct fpu *dfpu = &dst->thread.fpu;
 		struct fpu *sfpu = &src->thread.fpu;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fcc..b0fe078614d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu_init();
-	xsave_init();
 
 	raw_local_save_flags(kernel_eflags);
 
@@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu_init();
-	xsave_init();
 }
 #endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 528557470dd..6782e398386 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -22,9 +22,8 @@
 /*
  * Were we in an interrupt that interrupted kernel mode?
  *
- * For now, on xsave platforms we will return interrupted
- * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
- * for xsave platforms, ideally we can change the return value
+ * For now, with eagerfpu we will return interrupted kernel FPU
+ * state as not-idle. TBD: Ideally we can change the return value
  * to something like __thread_has_fpu(current). But we need to
  * be careful of doing __thread_clear_has_fpu() before saving
  * the FPU etc for supporting nested uses etc. For now, take
@@ -38,7 +37,7 @@
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
-	if (use_xsave())
+	if (use_eager_fpu())
 		return 0;
 
 	return !__thread_has_fpu(current) &&
@@ -84,7 +83,7 @@ void kernel_fpu_begin(void)
 		__save_init_fpu(me);
 		__thread_clear_has_fpu(me);
 		/* We do 'stts()' in kernel_fpu_end() */
-	} else if (!use_xsave()) {
+	} else if (!use_eager_fpu()) {
 		this_cpu_write(fpu_owner_task, NULL);
 		clts();
 	}
@@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
-	if (use_xsave())
+	if (use_eager_fpu())
 		math_state_restore();
 	else
 		stts();
@@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
 {
 	unsigned long mask = 0;
 
-	clts();
 	if (cpu_has_fxsr) {
 		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
 		asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
 			mask = 0x0000ffbf;
 	}
 	mxcsr_feature_mask &= mask;
-	stts();
 }
 
 static void __cpuinit init_thread_xstate(void)
@@ -185,9 +182,8 @@ void __cpuinit fpu_init(void)
 		init_thread_xstate();
 
 	mxcsr_feature_mask_init();
-	/* clean state in init */
-	current_thread_info()->status = 0;
-	clear_used_math();
+	xsave_init();
+	eager_fpu_init();
 }
 
 void fpu_finit(struct fpu *fpu)
@@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu)
 	}
 
 	if (cpu_has_fxsr) {
-		struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
-		memset(fx, 0, xstate_size);
-		fx->cwd = 0x37f;
-		if (cpu_has_xmm)
-			fx->mxcsr = MXCSR_DEFAULT;
+		fx_finit(&fpu->state->fxsave);
 	} else {
 		struct i387_fsave_struct *fp = &fpu->state->fsave;
 		memset(fp, 0, xstate_size);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c21e30f8923..dc3567e083f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -156,7 +156,7 @@ void flush_thread(void)
 	 * Free the FPU state for non xsave platforms. They get reallocated
 	 * lazily at the first use.
 	 */
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		free_thread_xstate(tsk);
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ac7d5275f6e..4f4aba0551b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -630,7 +630,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
-	BUG_ON(use_xsave());
+	BUG_ON(use_eager_fpu());
 
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e7752bd7cac..c0afd2c4376 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -400,7 +400,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 			set_used_math();
 		}
 
-		if (use_xsave())
+		if (use_eager_fpu())
 			math_state_restore();
 
 		return err;
@@ -450,28 +450,10 @@ static void prepare_fx_sw_frame(void)
  */
 static inline void xstate_enable(void)
 {
-	clts();
 	set_in_cr4(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
 }
 
-/*
- * This is same as math_state_restore(). But use_xsave() is not yet
- * patched to use math_state_restore().
- */
-static inline void init_restore_xstate(void)
-{
-	init_fpu(current);
-	__thread_fpu_begin(current);
-	xrstor_state(init_xstate_buf, -1);
-}
-
-static inline void xstate_enable_ap(void)
-{
-	xstate_enable();
-	init_restore_xstate();
-}
-
 /*
  * Record the offsets and sizes of different state managed by the xsave
  * memory layout.
@@ -500,17 +482,20 @@ static void __init setup_xstate_features(void)
 /*
  * setup the xstate image representing the init state
  */
-static void __init setup_xstate_init(void)
+static void __init setup_init_fpu_buf(void)
 {
-	setup_xstate_features();
-
 	/*
 	 * Setup init_xstate_buf to represent the init state of
 	 * all the features managed by the xsave
 	 */
 	init_xstate_buf = alloc_bootmem_align(xstate_size,
 					      __alignof__(struct xsave_struct));
-	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+	fx_finit(&init_xstate_buf->i387);
+
+	if (!cpu_has_xsave)
+		return;
+
+	setup_xstate_features();
 
 	/*
 	 * Init all the features state with header_bv being 0x0
@@ -523,6 +508,17 @@ static void __init setup_xstate_init(void)
 	xsave_state(init_xstate_buf, -1);
 }
 
+static int disable_eagerfpu;
+static int __init eager_fpu_setup(char *s)
+{
+	if (!strcmp(s, "on"))
+		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+	else if (!strcmp(s, "off"))
+		disable_eagerfpu = 1;
+	return 1;
+}
+__setup("eagerfpu=", eager_fpu_setup);
+
 /*
  * Enable and initialize the xsave feature.
  */
@@ -559,15 +555,10 @@ static void __init xstate_enable_boot_cpu(void)
 
 	update_regset_xstate_info(xstate_size, pcntxt_mask);
 	prepare_fx_sw_frame();
-
-	setup_xstate_init();
+	setup_init_fpu_buf();
 
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
-
-	current->thread.fpu.state =
-	     alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
-	init_restore_xstate();
 }
 
 /*
@@ -586,6 +577,42 @@ void __cpuinit xsave_init(void)
 		return;
 
 	this_func = next_func;
-	next_func = xstate_enable_ap;
+	next_func = xstate_enable;
 	this_func();
 }
+
+static inline void __init eager_fpu_init_bp(void)
+{
+	current->thread.fpu.state =
+	    alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
+	if (!init_xstate_buf)
+		setup_init_fpu_buf();
+}
+
+void __cpuinit eager_fpu_init(void)
+{
+	static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
+
+	clear_used_math();
+	current_thread_info()->status = 0;
+	if (!cpu_has_eager_fpu) {
+		stts();
+		return;
+	}
+
+	if (boot_func) {
+		boot_func();
+		boot_func = NULL;
+	}
+
+	/*
+	 * This is same as math_state_restore(). But use_xsave() is
+	 * not yet patched to use math_state_restore().
+	 */
+	init_fpu(current);
+	__thread_fpu_begin(current);
+	if (cpu_has_xsave)
+		xrstor_state(init_xstate_buf, -1);
+	else
+		fxrstor_checking(&init_xstate_buf->i387);
+}
-- 
cgit v1.2.3-70-g09d2


From 212b02125f3725127148475059a70031bd031bdb Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 6 Sep 2012 15:05:18 -0700
Subject: x86, fpu: enable eagerfpu by default for xsaveopt

xsaveopt/xrstor support optimized state save/restore by tracking the
INIT state and MODIFIED state during context-switch.

Enable eagerfpu by default for processors supporting xsaveopt.
Can be disabled by passing "eagerfpu=off" boot parameter.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-3-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt | 2 +-
 arch/x86/include/asm/cpufeature.h   | 1 +
 arch/x86/kernel/xsave.c             | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 741d064fdc6..e8f7faaa457 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1834,7 +1834,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			enabling legacy floating-point and sse state.
 
 	eagerfpu=	[X86]
-			on	enable eager fpu restore
+			on	enable eager fpu restore (default for xsaveopt)
 			off	disable eager fpu restore
 
 	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 5dd2b473ccf..0debdb51844 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -300,6 +300,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_xsaveopt	boot_cpu_has(X86_FEATURE_XSAVEOPT)
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index c0afd2c4376..e99f75439f6 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -557,6 +557,9 @@ static void __init xstate_enable_boot_cpu(void)
 	prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 
+	if (cpu_has_xsaveopt && !disable_eagerfpu)
+		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
 }
-- 
cgit v1.2.3-70-g09d2


From e00229819f306b1f86134095347e9187dc346bd1 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 10 Sep 2012 10:32:32 -0700
Subject: x86, fpu: make eagerfpu= boot param tri-state

Add the "eagerfpu=auto" (that selects the default scheme in
enabling eagerfpu) which can override compiled-in boot parameters
like "eagerfpu=on/off" (that force enable/disable eagerfpu).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-5-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  4 +++-
 arch/x86/kernel/xsave.c             | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e8f7faaa457..46a6a8288de 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1834,8 +1834,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			enabling legacy floating-point and sse state.
 
 	eagerfpu=	[X86]
-			on	enable eager fpu restore (default for xsaveopt)
+			on	enable eager fpu restore
 			off	disable eager fpu restore
+			auto	selects the default scheme, which automatically
+				enables eagerfpu restore for xsaveopt.
 
 	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
 			wfi(ARM) instruction doesn't work correctly and not to
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e99f75439f6..4e89b3dd408 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -508,13 +508,15 @@ static void __init setup_init_fpu_buf(void)
 	xsave_state(init_xstate_buf, -1);
 }
 
-static int disable_eagerfpu;
+static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
 static int __init eager_fpu_setup(char *s)
 {
 	if (!strcmp(s, "on"))
-		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+		eagerfpu = ENABLE;
 	else if (!strcmp(s, "off"))
-		disable_eagerfpu = 1;
+		eagerfpu = DISABLE;
+	else if (!strcmp(s, "auto"))
+		eagerfpu = AUTO;
 	return 1;
 }
 __setup("eagerfpu=", eager_fpu_setup);
@@ -557,8 +559,9 @@ static void __init xstate_enable_boot_cpu(void)
 	prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 
-	if (cpu_has_xsaveopt && !disable_eagerfpu)
-		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+	/* Auto enable eagerfpu for xsaveopt */
+	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+		eagerfpu = ENABLE;
 
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
@@ -598,6 +601,10 @@ void __cpuinit eager_fpu_init(void)
 
 	clear_used_math();
 	current_thread_info()->status = 0;
+
+	if (eagerfpu == ENABLE)
+		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+
 	if (!cpu_has_eager_fpu) {
 		stts();
 		return;
-- 
cgit v1.2.3-70-g09d2


From a8615af4bc3621cb01096541dafa6f68352ec2d9 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 10 Sep 2012 10:40:08 -0700
Subject: x86, fpu: remove cpu_has_xmm check in the fx_finit()

CPUs with FXSAVE but no XMM/MXCSR (Pentium II from Intel,
Crusoe/TM-3xxx/5xxx from Transmeta, and presumably some of the K6
generation from AMD) ever looked at the mxcsr field during
fxrstor/fxsave. So remove the cpu_has_xmm check in the fx_finit()

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-6-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/fpu-internal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 0ca72f0d4b4..92f3c6ed817 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -109,8 +109,7 @@ static inline void fx_finit(struct i387_fxsave_struct *fx)
 {
 	memset(fx, 0, xstate_size);
 	fx->cwd = 0x37f;
-	if (cpu_has_xmm)
-		fx->mxcsr = MXCSR_DEFAULT;
+	fx->mxcsr = MXCSR_DEFAULT;
 }
 
 extern void __sanitize_i387_state(struct task_struct *);
-- 
cgit v1.2.3-70-g09d2


From 3ddbebf878ac8d958bb34e87a742a6b3adc283a3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 17 Sep 2012 13:22:53 +0200
Subject: PCI: Discard __init annotations for pci_fixup_irqs() and related
 functions

Remove the __init annotations in order to keep pci_fixup_irqs() around
after init (e.g. for hotplug). This requires the same change for the
implementation of pcibios_update_irq() on all architectures. While at
it, all __devinit annotations are removed as well, since they will be
useless now that HOTPLUG is always on.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/kernel/pci.c      | 2 +-
 arch/arm/kernel/bios32.c     | 2 +-
 arch/ia64/pci/pci.c          | 2 +-
 arch/mips/pci/pci.c          | 2 +-
 arch/sh/drivers/pci/pci.c    | 2 +-
 arch/sparc/kernel/leon_pci.c | 2 +-
 arch/tile/kernel/pci.c       | 2 +-
 arch/tile/kernel/pci_gx.c    | 2 +-
 arch/unicore32/kernel/pci.c  | 2 +-
 arch/x86/pci/visws.c         | 2 +-
 arch/xtensa/kernel/pci.c     | 2 +-
 drivers/pci/setup-irq.c      | 4 ++--
 12 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 9816d5a4d17..920392f61ef 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -256,7 +256,7 @@ pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 2b2f25e7fef..0174fe6effe 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -272,7 +272,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it
 
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	if (debug_pci)
 		printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev));
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 81acc7a57f3..27db6a8afc4 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -461,7 +461,7 @@ void pcibios_set_master (struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-void __devinit
+void
 pcibios_update_irq (struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 690356808f8..64f0419e585 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -313,7 +313,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 40db2d0aef3..1bd3e089b74 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -192,7 +192,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 21dcda75a52..404621b775f 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -102,7 +102,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 #ifdef CONFIG_PCI_DEBUG
 	printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 33c10864d2f..6245bba8b1d 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -406,7 +406,7 @@ void pcibios_set_master(struct pci_dev *dev)
 /*
  * This is called from the generic Linux layer.
  */
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 0e213e35ffc..5faad0b1bd2 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1036,7 +1036,7 @@ char __devinit *pcibios_setup(char *str)
 /*
  * This is called from the generic Linux layer.
  */
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 46cb6c9de6c..c07ecc5baff 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -154,7 +154,7 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size,
 	zhole_size[0] = 0;
 }
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	if (debug_pci)
 		printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n",
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 6f2f8eeed17..9d736e7ff64 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -62,7 +62,7 @@ out:
 	return irq;
 }
 
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 69759e9cb3e..6f9b40c47e9 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -212,7 +212,7 @@ void pcibios_set_master(struct pci_dev *dev)
 
 /* the next one is stolen from the alpha port... */
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index eb219a1d16f..270ae7b9712 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -18,7 +18,7 @@
 #include <linux/cache.h>
 
 
-static void __init
+static void
 pdev_fixup_irq(struct pci_dev *dev,
 	       u8 (*swizzle)(struct pci_dev *, u8 *),
 	       int (*map_irq)(const struct pci_dev *, u8, u8))
@@ -54,7 +54,7 @@ pdev_fixup_irq(struct pci_dev *dev,
 	pcibios_update_irq(dev, irq);
 }
 
-void __init
+void
 pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
 	       int (*map_irq)(const struct pci_dev *, u8, u8))
 {
-- 
cgit v1.2.3-70-g09d2


From 8885b7b637fa9aca7e1b00581a0173c6956966d3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 17 Sep 2012 13:22:54 +0200
Subject: PCI: Provide a default pcibios_update_irq()

Most architectures implement this in exactly the same way. Instead of
having each architecture duplicate this function, provide a single
implementation in the core and make it a weak symbol so that it can be
overridden on architectures where it is required.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/alpha/kernel/pci.c      | 6 ------
 arch/arm/kernel/bios32.c     | 9 ---------
 arch/ia64/pci/pci.c          | 8 --------
 arch/m68k/kernel/pcibios.c   | 5 -----
 arch/mips/pci/pci.c          | 6 ------
 arch/sh/drivers/pci/pci.c    | 5 -----
 arch/sparc/kernel/leon_pci.c | 9 ---------
 arch/sparc/kernel/pci.c      | 4 ----
 arch/tile/kernel/pci.c       | 8 --------
 arch/tile/kernel/pci_gx.c    | 8 --------
 arch/unicore32/kernel/pci.c  | 8 --------
 arch/x86/pci/visws.c         | 5 -----
 arch/xtensa/kernel/pci.c     | 8 --------
 drivers/pci/setup-irq.c      | 5 +++++
 14 files changed, 5 insertions(+), 89 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 920392f61ef..ef757147cbf 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -256,12 +256,6 @@ pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int
 pcibios_enable_device(struct pci_dev *dev, int mask)
 {
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 0174fe6effe..9cf16b83bbb 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -270,15 +270,6 @@ static void __devinit pci_fixup_it8152(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152);
 
-
-
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	if (debug_pci)
-		printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev));
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * If the bus contains any of these devices, then we must not turn on
  * parity checking of any kind.  Currently this is CyberPro 20x0 only.
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 27db6a8afc4..a7ebe944027 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -461,14 +461,6 @@ void pcibios_set_master (struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-void
-pcibios_update_irq (struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-
-	/* ??? FIXME -- record old value for shutdown.  */
-}
-
 int
 pcibios_enable_device (struct pci_dev *dev, int mask)
 {
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index b2988aa1840..73fa0b56a06 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -87,11 +87,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return 0;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 64f0419e585..04e35bcde07 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -313,12 +313,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 #ifdef CONFIG_HOTPLUG
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 1bd3e089b74..a7e078f2e2e 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -192,11 +192,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 static void __init
 pcibios_bus_report_status_early(struct pci_channel *hose,
 				int top_bus, int current_bus,
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 404621b775f..fc052116156 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-#ifdef CONFIG_PCI_DEBUG
-	printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
-		pci_name(dev));
-#endif
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /* in/out routines taken from pcic.c
  *
  * This probably belongs here rather than ioport.c because
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 065b88c4f86..acc8c838ff7 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
 {
 }
 
-void pcibios_update_irq(struct pci_dev *pdev, int irq)
-{
-}
-
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 6245bba8b1d..dbdab34f27c 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -403,14 +403,6 @@ void pcibios_set_master(struct pci_dev *dev)
 	/* No special bus mastering setup handling. */
 }
 
-/*
- * This is called from the generic Linux layer.
- */
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * Enable memory and/or address decoding, as appropriate, for the
  * device described by the 'dev' struct.
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 5faad0b1bd2..2ba6d052f85 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1033,14 +1033,6 @@ char __devinit *pcibios_setup(char *str)
 	return str;
 }
 
-/*
- * This is called from the generic Linux layer.
- */
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * Enable memory address decoding, as appropriate, for the
  * device described by the 'dev' struct. The I/O decoding
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index c07ecc5baff..b0056f68d32 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -154,14 +154,6 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size,
 	zhole_size[0] = 0;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	if (debug_pci)
-		printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n",
-				irq, pci_name(dev));
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * If the bus contains any of these devices, then we must not turn on
  * parity checking of any kind.
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 9d736e7ff64..3e6d2a6db86 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -62,11 +62,6 @@ out:
 	return irq;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int __init pci_visws_init(void)
 {
 	pcibios_enable_irq = &pci_visws_enable_irq;
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 6f9b40c47e9..54354de38a7 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -210,14 +210,6 @@ void pcibios_set_master(struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-/* the next one is stolen from the alpha port... */
-
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index 270ae7b9712..9bd6864ec5d 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -17,6 +17,11 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 
+void __weak pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	dev_dbg(&dev->dev, "assigning IRQ %02d\n", irq);
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
 
 static void
 pdev_fixup_irq(struct pci_dev *dev,
-- 
cgit v1.2.3-70-g09d2


From 1e6dd8adc78d4a153db253d051fd4ef6c49c9019 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 5 Sep 2012 15:31:26 +0300
Subject: perf: Fix off by one test in perf_reg_value()

The test should be >= ARRAY_SIZE() instead of > ARRAY_SIZE().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/20120905123126.GC6128@elgon.mountain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/perf_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c5a3e5cfe07..e309cc5c276 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -57,7 +57,7 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
-	if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset)))
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
 		return 0;
 
 	return regs_get_register(regs, pt_regs_offset[idx]);
-- 
cgit v1.2.3-70-g09d2


From 924e101a7ab6f884047f4344e5f1154a4bcd63a6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 14 Sep 2012 18:37:46 +0200
Subject: x86/debug: Dump family, model, stepping of the boot CPU

When acting on a user bug report, we find ourselves constantly
asking for /proc/cpuinfo in order to know the exact family,
model, stepping of the CPU in question.

Instead of having to ask this, add this to dmesg so that it is
visible and no ambiguities can ensue from looking at the
official name string of the CPU coming from CPUID and trying
to map it to f/m/s.

Output then looks like this:

[    0.146041] smpboot: CPU0: AMD FX(tm)-8100 Eight-Core Processor (fam: 15, model: 01, stepping: 02)

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Link: http://lkml.kernel.org/r/1347640666-13638-1-git-send-email-bp@amd64.org
[ tweaked it minimally to add commas. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fcc..1cc48ff91cb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1023,14 +1023,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk(KERN_CONT "%s ", vendor);
 
 	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", c->x86_model_id);
+		printk(KERN_CONT "%s", strim(c->x86_model_id));
 	else
 		printk(KERN_CONT "%d86", c->x86);
 
+	printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+
 	if (c->x86_mask || c->cpuid_level >= 0)
-		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+		printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
 	else
-		printk(KERN_CONT "\n");
+		printk(KERN_CONT ")\n");
 
 	print_cpu_msr(c);
 }
-- 
cgit v1.2.3-70-g09d2


From e26a44a2d618a491d5c6a2a8aaf66ee03a94739f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 18 Sep 2012 12:16:14 +0100
Subject: x86: Use REP BSF unconditionally

Make "REP BSF" unconditional, as per the suggestion of hpa
and Linus, this removes the insane BSF_PREFIX conditional
and simplifies the logic.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/5058741E020000780009C014@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b2af6645ea7..6dfd0195bb5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -347,19 +347,6 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 	 ? constant_test_bit((nr), (addr))	\
 	 : variable_test_bit((nr), (addr)))
 
-#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \
-    && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE)
-/*
- * Since BSF and TZCNT have sufficiently similar semantics for the purposes
- * for which we use them here, BMI-capable hardware will decode the prefixed
- * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while
- * older hardware will ignore the REP prefix and decode it as 'bsf ...'.
- */
-# define BSF_PREFIX "rep;"
-#else
-# define BSF_PREFIX
-#endif
-
 /**
  * __ffs - find first set bit in word
  * @word: The word to search
@@ -368,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
  */
 static inline unsigned long __ffs(unsigned long word)
 {
-	asm(BSF_PREFIX "bsf %1,%0"
+	asm("rep; bsf %1,%0"
 		: "=r" (word)
 		: "rm" (word));
 	return word;
@@ -382,14 +369,12 @@ static inline unsigned long __ffs(unsigned long word)
  */
 static inline unsigned long ffz(unsigned long word)
 {
-	asm(BSF_PREFIX "bsf %1,%0"
+	asm("rep; bsf %1,%0"
 		: "=r" (word)
 		: "r" (~word));
 	return word;
 }
 
-#undef BSF_PREFIX
-
 /*
  * __fls: find last set bit in word
  * @word: The word to search
-- 
cgit v1.2.3-70-g09d2


From 20a36e39d59757252edbbdcf9574ae2998733ce9 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 11 Sep 2012 01:07:01 +0200
Subject: perf/x86: Fix Intel Ivy Bridge support

This patch updates the existing Intel IvyBridge (model 58)
support with proper PEBS event constraints. It cannot reuse
the same as SandyBridge because some events (0xd3) are
specific to IvyBridge.

Also there is no UOPS_DISPATCHED.THREAD on IVB, so do not
populate the PERF_COUNT_HW_STALLED_CYCLES_BACKEND mapping.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Link: http://lkml.kernel.org/r/20120910230701.GA5898@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          |  2 ++
 arch/x86/kernel/cpu/perf_event_intel.c    | 24 +++++++++++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 14 ++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6605a81ba33..8b6defe7eef 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
 
 extern struct event_constraint intel_snb_pebs_event_constraints[];
 
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0d3d63afa76..6bca492b854 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2048,7 +2048,6 @@ __init int intel_pmu_init(void)
 	case 42: /* SandyBridge */
 	case 45: /* SandyBridge, "Romely-EP" */
 		x86_add_quirk(intel_sandybridge_quirk);
-	case 58: /* IvyBridge */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2073,6 +2072,29 @@ __init int intel_pmu_init(void)
 
 		pr_cont("SandyBridge events, ");
 		break;
+	case 58: /* IvyBridge */
+		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_snb();
+
+		x86_pmu.event_constraints = intel_snb_event_constraints;
+		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		x86_pmu.extra_regs = intel_snb_extra_regs;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+		pr_cont("IvyBridge events, ");
+		break;
+
 
 	default:
 		switch (x86_pmu.version) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e38d97bf425..826054a4f2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+        INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+        INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+        EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
-- 
cgit v1.2.3-70-g09d2


From 4b8073e467e6a66b6a5a8e799d28bc3b243c0d78 Mon Sep 17 00:00:00 2001
From: Peter Senna Tschudin <peter.senna@gmail.com>
Date: Tue, 18 Sep 2012 18:36:14 +0200
Subject: arch/x86: Remove unecessary semicolons

Found by http://coccinelle.lip6.fr/

Signed-off-by: Peter Senna Tschudin <peter.senna@gmail.com>
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
Cc: a.p.zijlstra@chello.nl
Cc: rusty@rustcorp.com.au
Cc: masami.hiramatsu.pt@hitachi.com
Cc: suresh.b.siddha@intel.com
Cc: joerg.roedel@amd.com
Cc: agordeev@redhat.com
Cc: yinghai@kernel.org
Cc: bhelgaas@google.com
Cc: liuj97@gmail.com
Link: http://lkml.kernel.org/r/1347986174-30287-7-git-send-email-peter.senna@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c  | 4 ++--
 arch/x86/kernel/apic/apic.c    | 2 +-
 arch/x86/kvm/vmx.c             | 2 +-
 arch/x86/pci/mmconfig-shared.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ced4534baed..3318b1e53e0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -317,7 +317,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
 		/* turn DS segment override prefix into lock prefix */
 		if (*ptr == 0x3e)
 			text_poke(ptr, ((unsigned char []){0xf0}), 1);
-	};
+	}
 	mutex_unlock(&text_mutex);
 }
 
@@ -338,7 +338,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 		/* turn lock prefix into DS segment override prefix */
 		if (*ptr == 0xf0)
 			text_poke(ptr, ((unsigned char []){0x3E}), 1);
-	};
+	}
 	mutex_unlock(&text_mutex);
 }
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24deb308232..b17416e72fb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
 		i++;
 		v1 >>= 1;
-	};
+	}
 
 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b1eb202ee76..b06737d122f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4543,7 +4543,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 				vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
 				return 0;
 			}
-		};
+		}
 		break;
 	case 2: /* clts */
 		handle_clts(vcpu);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 937bcece700..704b9ec043d 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
 	while (i >= sizeof(struct acpi_mcfg_allocation)) {
 		entries++;
 		i -= sizeof(struct acpi_mcfg_allocation);
-	};
+	}
 	if (entries == 0) {
 		pr_err(PREFIX "MMCONFIG has no entries\n");
 		return -ENODEV;
-- 
cgit v1.2.3-70-g09d2


From 2d297480037e1d9100ca504737820c1bf65db6c0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 5 Sep 2012 15:30:42 +0300
Subject: x86, microcode, AMD: Fix use after free in free_cache()

list_for_each_entry_reverse() dereferences the iterator, but we already
freed it. I don't see a reason that this has to be done in reverse order
so change it to use list_for_each_entry_safe().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/kernel/microcode_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5511216b443..7720ff5a9ee 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -150,9 +150,9 @@ static void update_cache(struct ucode_patch *new_patch)
 
 static void free_cache(void)
 {
-	struct ucode_patch *p;
+	struct ucode_patch *p, *tmp;
 
-	list_for_each_entry_reverse(p, &pcache, plist) {
+	list_for_each_entry_safe(p, tmp, &pcache, plist) {
 		__list_del(p->plist.prev, p->plist.next);
 		kfree(p->data);
 		kfree(p);
-- 
cgit v1.2.3-70-g09d2


From bd49940a35ec7d488ae63bd625639893b3385b97 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 19 Sep 2012 08:30:55 -0400
Subject: xen/boot: Disable BIOS SMP MP table search.

As the initial domain we are able to search/map certain regions
of memory to harvest configuration data. For all low-level we
use ACPI tables - for interrupts we use exclusively ACPI _PRT
(so DSDT) and MADT for INT_SRC_OVR.

The SMP MP table is not used at all. As a matter of fact we do
not even support machines that only have SMP MP but no ACPI tables.

Lets follow how Moorestown does it and just disable searching
for BIOS SMP tables.

This also fixes an issue on HP Proliant BL680c G5 and DL380 G6:

9f->100 for 1:1 PTE
Freeing 9f-100 pfn range: 97 pages freed
1-1 mapping on 9f->100
.. snip..
e820: BIOS-provided physical RAM map:
Xen: [mem 0x0000000000000000-0x000000000009efff] usable
Xen: [mem 0x000000000009f400-0x00000000000fffff] reserved
Xen: [mem 0x0000000000100000-0x00000000cfd1dfff] usable
.. snip..
Scan for SMP in [mem 0x00000000-0x000003ff]
Scan for SMP in [mem 0x0009fc00-0x0009ffff]
Scan for SMP in [mem 0x000f0000-0x000fffff]
found SMP MP-table at [mem 0x000f4fa0-0x000f4faf] mapped at [ffff8800000f4fa0]
(XEN) mm.c:908:d0 Error getting mfn 100 (pfn 5555555555555555) from L1 entry 0000000000100461 for l1e_owner=0, pg_owner=0
(XEN) mm.c:4995:d0 ptwr_emulate: could not get_page_from_l1e()
BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff81ac07e2>] xen_set_pte_init+0x66/0x71
. snip..
Pid: 0, comm: swapper Not tainted 3.6.0-rc6upstream-00188-gb6fb969-dirty #2 HP ProLiant BL680c G5
.. snip..
Call Trace:
 [<ffffffff81ad31c6>] __early_ioremap+0x18a/0x248
 [<ffffffff81624731>] ? printk+0x48/0x4a
 [<ffffffff81ad32ac>] early_ioremap+0x13/0x15
 [<ffffffff81acc140>] get_mpc_size+0x2f/0x67
 [<ffffffff81acc284>] smp_scan_config+0x10c/0x136
 [<ffffffff81acc2e4>] default_find_smp_config+0x36/0x5a
 [<ffffffff81ac3085>] setup_arch+0x5b3/0xb5b
 [<ffffffff81624731>] ? printk+0x48/0x4a
 [<ffffffff81abca7f>] start_kernel+0x90/0x390
 [<ffffffff81abc356>] x86_64_start_reservations+0x131/0x136
 [<ffffffff81abfa83>] xen_start_kernel+0x65f/0x661
(XEN) Domain 0 crashed: 'noreboot' set - not rebooting.

which is that ioremap would end up mapping 0xff using _PAGE_IOMAP
(which is what early_ioremap sticks as a flag) - which meant
we would get MFN 0xFF (pte ff461, which is OK), and then it would
also map 0x100 (b/c ioremap tries to get page aligned request, and
it was trying to map 0xf4fa0 + PAGE_SIZE - so it mapped the next page)
as _PAGE_IOMAP. Since 0x100 is actually a RAM page, and the _PAGE_IOMAP
bypasses the P2M lookup we would happily set the PTE to 1000461.
Xen would deny the request since we do not have access to the
Machine Frame Number (MFN) of 0x100. The P2M[0x100] is for example
0x80140.

CC: stable@vger.kernel.org
Fixes-Oracle-Bugzilla: https://bugzilla.oracle.com/bugzilla/show_bug.cgi?id=13665
Acked-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9642d4a3860..1fbe75a95f1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void)
 		pci_request_acs();
 
 		xen_acpi_sleep_register();
+
+		/* Avoid searching for BIOS MP tables */
+		x86_init.mpparse.find_smp_config = x86_init_noop;
+		x86_init.mpparse.get_smp_config = x86_init_uint_noop;
 	}
 #ifdef CONFIG_PCI
 	/* PCI BIOS service won't work from a PV guest. */
-- 
cgit v1.2.3-70-g09d2


From 8ea667f259e3767fd3ee85a885c14e417835695e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 13:44:53 +0300
Subject: KVM: MMU: Push clean gpte write protection out of gpte_access()

gpte_access() computes the access permissions of a guest pte and also
write-protects clean gptes.  This is wrong when we are servicing a
write fault (since we'll be setting the dirty bit momentarily) but
correct when instantiating a speculative spte, or when servicing a
read fault (since we'll want to trap a following write in order to
set the dirty bit).

It doesn't seem to hurt in practice, but in order to make the code
readable, push the write protection out of gpte_access() and into
a new protect_clean_gpte() which is called explicitly when needed.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 12 ++++++++++++
 arch/x86/kvm/mmu.h         |  3 ++-
 arch/x86/kvm/paging_tmpl.h | 24 ++++++++++++------------
 3 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa0b469ee07..54c9cb4fdfa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
 	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
 }
 
+static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
+{
+	unsigned mask;
+
+	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+	mask = (unsigned)~ACC_WRITE_MASK;
+	/* Allow write access to dirty gptes */
+	mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
+	*access &= mask;
+}
+
 static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
 			   int *nr_present)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e374db9af02..2832081e9b2 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -18,7 +18,8 @@
 #define PT_PCD_MASK (1ULL << 4)
 #define PT_ACCESSED_SHIFT 5
 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
-#define PT_DIRTY_MASK (1ULL << 6)
+#define PT_DIRTY_SHIFT 6
+#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
 #define PT_PAGE_SIZE_MASK (1ULL << 7)
 #define PT_PAT_MASK (1ULL << 7)
 #define PT_GLOBAL_MASK (1ULL << 8)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf8c42bf50f..bf7b4ffafab 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -101,14 +101,11 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
-				   bool last)
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 {
 	unsigned access;
 
 	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-	if (last && !is_dirty_gpte(gpte))
-		access &= ~ACC_WRITE_MASK;
 
 #if PTTYPE == 64
 	if (vcpu->arch.mmu.nx)
@@ -222,8 +219,7 @@ retry_walk:
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
 		if (last_gpte) {
-			pte_access = pt_access &
-				     FNAME(gpte_access)(vcpu, pte, true);
+			pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
 			/* check if the kernel is fetching from user page */
 			if (unlikely(pte_access & PT_USER_MASK) &&
 			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
@@ -274,7 +270,7 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= FNAME(gpte_access)(vcpu, pte, false);
+		pt_access &= FNAME(gpte_access)(vcpu, pte);
 		--walker->level;
 	}
 
@@ -283,7 +279,9 @@ retry_walk:
 		goto error;
 	}
 
-	if (write_fault && unlikely(!is_dirty_gpte(pte))) {
+	if (!write_fault)
+		protect_clean_gpte(&pte_access, pte);
+	else if (unlikely(!is_dirty_gpte(pte))) {
 		int ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
@@ -368,7 +366,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return;
 
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
+	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	protect_clean_gpte(&pte_access, gpte);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
 	if (mmu_invalid_pfn(pfn))
 		return;
@@ -441,8 +440,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 			continue;
 
-		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
-								  true);
+		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+		protect_clean_gpte(&pte_access, gpte);
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 				      pte_access & ACC_WRITE_MASK);
@@ -794,7 +793,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(vcpu, gpte, true);
+		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		protect_clean_gpte(&pte_access, gpte);
 
 		if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
 			continue;
-- 
cgit v1.2.3-70-g09d2


From edc2ae84eb40a3c062210fe01af1cae1633cc810 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 13:53:08 +0300
Subject: KVM: MMU: Optimize gpte_access() slightly

If nx is disabled, then is gpte[63] is set we will hit a reserved
bit set fault before checking permissions; so we can ignore the
setting of efer.nxe.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf7b4ffafab..064bcb32d84 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -106,10 +106,8 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 	unsigned access;
 
 	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-
 #if PTTYPE == 64
-	if (vcpu->arch.mmu.nx)
-		access &= ~(gpte >> PT64_NX_SHIFT);
+	access &= ~(gpte >> PT64_NX_SHIFT);
 #endif
 	return access;
 }
-- 
cgit v1.2.3-70-g09d2


From 3d34adec7081621ff51c195be045b87d75c0c49d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 14:03:28 +0300
Subject: KVM: MMU: Move gpte_access() out of paging_tmpl.h

We no longer rely on paging_tmpl.h defines; so we can move the function
to mmu.c.

Rely on zero extension to 64 bits to get the correct nx behaviour.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 10 ++++++++++
 arch/x86/kvm/paging_tmpl.h | 21 +++++----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 54c9cb4fdfa..f297a2ccf4f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3437,6 +3437,16 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
 	return false;
 }
 
+static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
+{
+	unsigned access;
+
+	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+	access &= ~(gpte >> PT64_NX_SHIFT);
+
+	return access;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 064bcb32d84..1cbf576852c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -101,17 +101,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
-{
-	unsigned access;
-
-	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-#if PTTYPE == 64
-	access &= ~(gpte >> PT64_NX_SHIFT);
-#endif
-	return access;
-}
-
 static bool FNAME(is_last_gpte)(struct guest_walker *walker,
 				struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				pt_element_t gpte)
@@ -217,7 +206,7 @@ retry_walk:
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
 		if (last_gpte) {
-			pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
+			pte_access = pt_access & gpte_access(vcpu, pte);
 			/* check if the kernel is fetching from user page */
 			if (unlikely(pte_access & PT_USER_MASK) &&
 			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
@@ -268,7 +257,7 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= FNAME(gpte_access)(vcpu, pte);
+		pt_access &= gpte_access(vcpu, pte);
 		--walker->level;
 	}
 
@@ -364,7 +353,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return;
 
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	pte_access = sp->role.access & gpte_access(vcpu, gpte);
 	protect_clean_gpte(&pte_access, gpte);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
 	if (mmu_invalid_pfn(pfn))
@@ -438,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 			continue;
 
-		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+		pte_access = sp->role.access & gpte_access(vcpu, gpte);
 		protect_clean_gpte(&pte_access, gpte);
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
@@ -791,7 +780,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		pte_access &= gpte_access(vcpu, gpte);
 		protect_clean_gpte(&pte_access, gpte);
 
 		if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
-- 
cgit v1.2.3-70-g09d2


From 8cbc70696f149e44753b0fe60162b4ff96c2dd2b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 14:18:51 +0300
Subject: KVM: MMU: Update accessed and dirty bits after guest pagetable walk

While unspecified, the behaviour of Intel processors is to first
perform the page table walk, then, if the walk was successful, to
atomically update the accessed and dirty bits of walked paging elements.

While we are not required to follow this exactly, doing so will allow us
to perform the access permissions check after the walk is complete, rather
than after each walk step.

(the tricky case is SMEP: a zero in any pte's U bit makes the referenced
page a supervisor page, so we can't fault on a one bit during the walk
itself).

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 76 ++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1cbf576852c..35a05dd2f69 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -63,10 +63,12 @@
  */
 struct guest_walker {
 	int level;
+	unsigned max_level;
 	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
 	pt_element_t ptes[PT_MAX_FULL_LEVELS];
 	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
+	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
 	unsigned pt_access;
 	unsigned pte_access;
 	gfn_t gfn;
@@ -119,6 +121,43 @@ static bool FNAME(is_last_gpte)(struct guest_walker *walker,
 	return false;
 }
 
+static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
+					     struct kvm_mmu *mmu,
+					     struct guest_walker *walker,
+					     int write_fault)
+{
+	unsigned level, index;
+	pt_element_t pte, orig_pte;
+	pt_element_t __user *ptep_user;
+	gfn_t table_gfn;
+	int ret;
+
+	for (level = walker->max_level; level >= walker->level; --level) {
+		pte = orig_pte = walker->ptes[level - 1];
+		table_gfn = walker->table_gfn[level - 1];
+		ptep_user = walker->ptep_user[level - 1];
+		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
+		if (!(pte & PT_ACCESSED_MASK)) {
+			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
+			pte |= PT_ACCESSED_MASK;
+		}
+		if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
+			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+			pte |= PT_DIRTY_MASK;
+		}
+		if (pte == orig_pte)
+			continue;
+
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
+		if (ret)
+			return ret;
+
+		mark_page_dirty(vcpu->kvm, table_gfn);
+		walker->ptes[level] = pte;
+	}
+	return 0;
+}
+
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -126,6 +165,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				    gva_t addr, u32 access)
 {
+	int ret;
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
@@ -153,6 +193,7 @@ retry_walk:
 		--walker->level;
 	}
 #endif
+	walker->max_level = walker->level;
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
@@ -183,6 +224,7 @@ retry_walk:
 		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
 		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
 			goto error;
+		walker->ptep_user[walker->level - 1] = ptep_user;
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
@@ -214,21 +256,6 @@ retry_walk:
 					eperm = true;
 		}
 
-		if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
-			int ret;
-			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
-						       sizeof(pte));
-			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-						  pte, pte|PT_ACCESSED_MASK);
-			if (unlikely(ret < 0))
-				goto error;
-			else if (ret)
-				goto retry_walk;
-
-			mark_page_dirty(vcpu->kvm, table_gfn);
-			pte |= PT_ACCESSED_MASK;
-		}
-
 		walker->ptes[walker->level - 1] = pte;
 
 		if (last_gpte) {
@@ -268,21 +295,12 @@ retry_walk:
 
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
-	else if (unlikely(!is_dirty_gpte(pte))) {
-		int ret;
 
-		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-					  pte, pte|PT_DIRTY_MASK);
-		if (unlikely(ret < 0))
-			goto error;
-		else if (ret)
-			goto retry_walk;
-
-		mark_page_dirty(vcpu->kvm, table_gfn);
-		pte |= PT_DIRTY_MASK;
-		walker->ptes[walker->level - 1] = pte;
-	}
+	ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+	if (unlikely(ret < 0))
+		goto error;
+	else if (ret)
+		goto retry_walk;
 
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
-- 
cgit v1.2.3-70-g09d2


From 97d64b788114be1c4dc4bfe7a8ba2bf9643fe6af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 14:52:00 +0300
Subject: KVM: MMU: Optimize pte permission checks

walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup.  It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.

Optimize this away by precalculating all variants and storing them in a
bitmap.  The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).

The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.

The result is short, branch-free code.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu.c              | 38 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              | 19 ++++++++-----------
 arch/x86/kvm/paging_tmpl.h      | 22 ++++------------------
 arch/x86/kvm/x86.c              | 11 ++++-------
 5 files changed, 61 insertions(+), 36 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb6117e1..3318bde206a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -287,6 +287,13 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 	bool direct_map;
 
+	/*
+	 * Bitmap; bit set = permission fault
+	 * Byte index: page fault error code [4:1]
+	 * Bit index: pte permissions in ACC_* format
+	 */
+	u8 permissions[16];
+
 	u64 *pae_root;
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f297a2ccf4f..9c6188931f8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3516,6 +3516,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	}
 }
 
+static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	unsigned bit, byte, pfec;
+	u8 map;
+	bool fault, x, w, u, wf, uf, ff, smep;
+
+	smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
+		pfec = byte << 1;
+		map = 0;
+		wf = pfec & PFERR_WRITE_MASK;
+		uf = pfec & PFERR_USER_MASK;
+		ff = pfec & PFERR_FETCH_MASK;
+		for (bit = 0; bit < 8; ++bit) {
+			x = bit & ACC_EXEC_MASK;
+			w = bit & ACC_WRITE_MASK;
+			u = bit & ACC_USER_MASK;
+
+			/* Not really needed: !nx will cause pte.nx to fault */
+			x |= !mmu->nx;
+			/* Allow supervisor writes if !cr0.wp */
+			w |= !is_write_protection(vcpu) && !uf;
+			/* Disallow supervisor fetches of user code if cr4.smep */
+			x &= !(smep && u && !uf);
+
+			fault = (ff && !x) || (uf && !u) || (wf && !w);
+			map |= fault << bit;
+		}
+		mmu->permissions[byte] = map;
+	}
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *context,
 					int level)
@@ -3524,6 +3556,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->root_level = level;
 
 	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
 	context->new_cr3 = paging_new_cr3;
@@ -3552,6 +3585,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 	context->root_level = PT32_ROOT_LEVEL;
 
 	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
@@ -3612,6 +3646,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
+	update_permission_bitmask(vcpu, context);
+
 	return 0;
 }
 
@@ -3687,6 +3723,8 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
 
+	update_permission_bitmask(vcpu, g_context);
+
 	return 0;
 }
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2832081e9b2..584660775d0 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -89,17 +89,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
 	return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
 }
 
-static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
-					   bool write_fault, bool user_fault,
-					   unsigned long pte)
+/*
+ * Will a fault with a given page-fault error code (pfec) cause a permission
+ * fault with the given access (in ACC_* format)?
+ */
+static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
+				    unsigned pfec)
 {
-	if (unlikely(write_fault && !is_writable_pte(pte)
-	      && (user_fault || is_write_protection(vcpu))))
-		return false;
-
-	if (unlikely(user_fault && !(pte & PT_USER_MASK)))
-		return false;
-
-	return true;
+	return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
 }
+
 #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 35a05dd2f69..8f6c59fadbb 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -169,7 +169,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, uninitialized_var(pte_access);
+	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
 	bool eperm, last_gpte;
 	int offset;
@@ -237,24 +237,9 @@ retry_walk:
 			goto error;
 		}
 
-		if (!check_write_user_access(vcpu, write_fault, user_fault,
-					  pte))
-			eperm = true;
-
-#if PTTYPE == 64
-		if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
-			eperm = true;
-#endif
+		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
-		if (last_gpte) {
-			pte_access = pt_access & gpte_access(vcpu, pte);
-			/* check if the kernel is fetching from user page */
-			if (unlikely(pte_access & PT_USER_MASK) &&
-			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
-				if (fetch_fault && !user_fault)
-					eperm = true;
-		}
 
 		walker->ptes[walker->level - 1] = pte;
 
@@ -284,10 +269,11 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= gpte_access(vcpu, pte);
+		pt_access &= pte_access;
 		--walker->level;
 	}
 
+	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
 		errcode |= PFERR_PRESENT_MASK;
 		goto error;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19047eafa38..497226e49d4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 				gpa_t *gpa, struct x86_exception *exception,
 				bool write)
 {
-	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+		| (write ? PFERR_WRITE_MASK : 0);
 
-	if (vcpu_match_mmio_gva(vcpu, gva) &&
-		  check_write_user_access(vcpu, write, access,
-		  vcpu->arch.access)) {
+	if (vcpu_match_mmio_gva(vcpu, gva)
+	    && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
 		return 1;
 	}
 
-	if (write)
-		access |= PFERR_WRITE_MASK;
-
 	*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 
 	if (*gpa == UNMAPPED_GVA)
-- 
cgit v1.2.3-70-g09d2


From 13d22b6aebb000aeaf137862c6c0e0c4d138d798 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 15:12:09 +0300
Subject: KVM: MMU: Simplify walk_addr_generic() loop

The page table walk is coded as an infinite loop, with a special
case on the last pte.

Code it as an ordinary loop with a termination condition on the last
pte (large page or walk length exhausted), and put the last pte handling
code after the loop where it belongs.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 60 +++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 8f6c59fadbb..1b4c14d235a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -171,12 +171,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
-	bool eperm, last_gpte;
+	bool eperm;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
 	const int fetch_fault = access & PFERR_FETCH_MASK;
 	u16 errcode = 0;
+	gpa_t real_gpa;
+	gfn_t gfn;
+	u32 ac;
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
@@ -197,12 +200,16 @@ retry_walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
-	pt_access = ACC_ALL;
+	pt_access = pte_access = ACC_ALL;
+	++walker->level;
 
-	for (;;) {
+	do {
 		gfn_t real_gfn;
 		unsigned long host_addr;
 
+		pt_access &= pte_access;
+		--walker->level;
+
 		index = PT_INDEX(addr, walker->level);
 
 		table_gfn = gpte_to_gfn(pte);
@@ -239,39 +246,8 @@ retry_walk:
 
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
-		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
-
 		walker->ptes[walker->level - 1] = pte;
-
-		if (last_gpte) {
-			int lvl = walker->level;
-			gpa_t real_gpa;
-			gfn_t gfn;
-			u32 ac;
-
-			gfn = gpte_to_gfn_lvl(pte, lvl);
-			gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
-
-			if (PTTYPE == 32 &&
-			    walker->level == PT_DIRECTORY_LEVEL &&
-			    is_cpuid_PSE36())
-				gfn += pse36_gfn_delta(pte);
-
-			ac = write_fault | fetch_fault | user_fault;
-
-			real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
-						      ac);
-			if (real_gpa == UNMAPPED_GVA)
-				return 0;
-
-			walker->gfn = real_gpa >> PAGE_SHIFT;
-
-			break;
-		}
-
-		pt_access &= pte_access;
-		--walker->level;
-	}
+	} while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte));
 
 	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
@@ -279,6 +255,20 @@ retry_walk:
 		goto error;
 	}
 
+	gfn = gpte_to_gfn_lvl(pte, walker->level);
+	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
+
+	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
+		gfn += pse36_gfn_delta(pte);
+
+	ac = write_fault | fetch_fault | user_fault;
+
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
+	if (real_gpa == UNMAPPED_GVA)
+		return 0;
+
+	walker->gfn = real_gpa >> PAGE_SHIFT;
+
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
 
-- 
cgit v1.2.3-70-g09d2


From 6fd01b711bee96ce3356f7b6f370ab708e37504b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 20:46:56 +0300
Subject: KVM: MMU: Optimize is_last_gpte()

Instead of branchy code depending on level, gpte.ps, and mmu configuration,
prepare everything in a bitmap during mode changes and look it up during
runtime.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu.c              | 31 +++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              |  3 ++-
 arch/x86/kvm/paging_tmpl.h      | 20 +-------------------
 4 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3318bde206a..43aeb942283 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -298,6 +298,13 @@ struct kvm_mmu {
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
 
+	/*
+	 * Bitmap: bit set = last pte in walk
+	 * index[0:1]: level (zero-based)
+	 * index[2]: pte.ps
+	 */
+	u8 last_pte_bitmap;
+
 	bool nx;
 
 	u64 pdptrs[4]; /* pae */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9c6188931f8..d289fee1ffb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3447,6 +3447,15 @@ static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
 	return access;
 }
 
+static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
+{
+	unsigned index;
+
+	index = level - 1;
+	index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
+	return mmu->last_pte_bitmap & (1 << index);
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -3548,6 +3557,24 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
 	}
 }
 
+static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	u8 map;
+	unsigned level, root_level = mmu->root_level;
+	const unsigned ps_set_index = 1 << 2;  /* bit 2 of index: ps */
+
+	if (root_level == PT32E_ROOT_LEVEL)
+		--root_level;
+	/* PT_PAGE_TABLE_LEVEL always terminates */
+	map = 1 | (1 << ps_set_index);
+	for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
+		if (level <= PT_PDPE_LEVEL
+		    && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
+			map |= 1 << (ps_set_index | (level - 1));
+	}
+	mmu->last_pte_bitmap = map;
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *context,
 					int level)
@@ -3557,6 +3584,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
 	context->new_cr3 = paging_new_cr3;
@@ -3586,6 +3614,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
@@ -3647,6 +3676,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	return 0;
 }
@@ -3724,6 +3754,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(vcpu, g_context);
+	update_last_pte_bitmap(vcpu, g_context);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 584660775d0..69871080e86 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -20,7 +20,8 @@
 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
 #define PT_DIRTY_SHIFT 6
 #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
-#define PT_PAGE_SIZE_MASK (1ULL << 7)
+#define PT_PAGE_SIZE_SHIFT 7
+#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
 #define PT_PAT_MASK (1ULL << 7)
 #define PT_GLOBAL_MASK (1ULL << 8)
 #define PT64_NX_SHIFT 63
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1b4c14d235a..134ea7b1c58 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -103,24 +103,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static bool FNAME(is_last_gpte)(struct guest_walker *walker,
-				struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				pt_element_t gpte)
-{
-	if (walker->level == PT_PAGE_TABLE_LEVEL)
-		return true;
-
-	if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) &&
-	    (PTTYPE == 64 || is_pse(vcpu)))
-		return true;
-
-	if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) &&
-	    (mmu->root_level == PT64_ROOT_LEVEL))
-		return true;
-
-	return false;
-}
-
 static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 					     struct kvm_mmu *mmu,
 					     struct guest_walker *walker,
@@ -247,7 +229,7 @@ retry_walk:
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
-	} while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte));
+	} while (!is_last_gpte(mmu, walker->level, pte));
 
 	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
-- 
cgit v1.2.3-70-g09d2


From 71331a1da1e3a66d14bb3864f99e32d84ab5a76f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 14:49:15 +0300
Subject: KVM: MMU: Eliminate eperm temporary

'eperm' is no longer used in the walker loop, so we can eliminate it.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 134ea7b1c58..95a64d1dccc 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -153,7 +153,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
-	bool eperm;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
@@ -165,7 +164,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
-	eperm = false;
 	walker->level = mmu->root_level;
 	pte           = mmu->get_cr3(vcpu);
 
@@ -231,8 +229,7 @@ retry_walk:
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
-	eperm |= permission_fault(mmu, pte_access, access);
-	if (unlikely(eperm)) {
+	if (unlikely(permission_fault(mmu, pte_access, access))) {
 		errcode |= PFERR_PRESENT_MASK;
 		goto error;
 	}
-- 
cgit v1.2.3-70-g09d2


From b514c30f7729b66af481fde3c1225e832e339d5b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:03:02 +0300
Subject: KVM: MMU: Avoid access/dirty update loop if all is well

Keep track of accessed/dirty bits; if they are all set, do not
enter the accessed/dirty update loop.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 95a64d1dccc..810c1da2ee4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, pte_access;
+	unsigned index, pt_access, pte_access, accessed_dirty, shift;
 	gpa_t pte_gpa;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
@@ -180,6 +180,7 @@ retry_walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
+	accessed_dirty = PT_ACCESSED_MASK;
 	pt_access = pte_access = ACC_ALL;
 	++walker->level;
 
@@ -224,6 +225,7 @@ retry_walk:
 			goto error;
 		}
 
+		accessed_dirty &= pte;
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
@@ -251,11 +253,23 @@ retry_walk:
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
 
-	ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
-	if (unlikely(ret < 0))
-		goto error;
-	else if (ret)
-		goto retry_walk;
+	/*
+	 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
+	 * place right.
+	 *
+	 * On a read fault, do nothing.
+	 */
+	shift = write_fault >> ilog2(PFERR_WRITE_MASK);
+	shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
+	accessed_dirty &= pte >> shift;
+
+	if (unlikely(!accessed_dirty)) {
+		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+		if (unlikely(ret < 0))
+			goto error;
+		else if (ret)
+			goto retry_walk;
+	}
 
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
-- 
cgit v1.2.3-70-g09d2


From c5421519f30bd5ed77857a78de6dc8414385e602 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 19 Sep 2012 19:33:48 +0300
Subject: KVM: MMU: Eliminate pointless temporary 'ac'

'ac' essentially reconstructs the 'access' variable we already
have, except for the PFERR_PRESENT_MASK and PFERR_RSVD_MASK.  As
these are not used by callees, just use 'access' directly.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 810c1da2ee4..714e2c01a6f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -160,7 +160,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	u16 errcode = 0;
 	gpa_t real_gpa;
 	gfn_t gfn;
-	u32 ac;
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
@@ -242,9 +241,7 @@ retry_walk:
 	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
 		gfn += pse36_gfn_delta(pte);
 
-	ac = write_fault | fetch_fault | user_fault;
-
-	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
 	if (real_gpa == UNMAPPED_GVA)
 		return 0;
 
-- 
cgit v1.2.3-70-g09d2


From 1e08ec4a130e2745d96df169e67c58df98a07311 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 13 Sep 2012 17:19:24 +0300
Subject: KVM: optimize apic interrupt delivery

Most interrupt are delivered to only one vcpu. Use pre-build tables to
find interrupt destination instead of looping through all vcpus. In case
of logical mode loop only through vcpus in a logical cluster irq is sent
to.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  12 +++
 arch/x86/kvm/lapic.c            | 188 +++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/lapic.h            |   3 +
 arch/x86/kvm/x86.c              |   2 +
 virt/kvm/irq_comm.c             |   7 +-
 5 files changed, 199 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 43aeb942283..0b902c98f27 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -525,6 +525,16 @@ struct kvm_arch_memory_slot {
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+struct kvm_apic_map {
+	struct rcu_head rcu;
+	u8 ldr_bits;
+	/* fields bellow are used to decode ldr values in different modes */
+	u32 cid_shift, cid_mask, lid_mask;
+	struct kvm_lapic *phys_map[256];
+	/* first index is cluster id second is cpu id in a cluster */
+	struct kvm_lapic *logical_map[16][16];
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -542,6 +552,8 @@ struct kvm_arch {
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	int vapics_in_nmi_mode;
+	struct mutex apic_map_lock;
+	struct kvm_apic_map *apic_map;
 
 	unsigned int tss_addr;
 	struct page *apic_access_page;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6f9fd633c88..c6e6b721b6e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,11 +140,110 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+{
+	u16 cid;
+	ldr >>= 32 - map->ldr_bits;
+	cid = (ldr >> map->cid_shift) & map->cid_mask;
+
+	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
+
+	return cid;
+}
+
+static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
+{
+	ldr >>= (32 - map->ldr_bits);
+	return ldr & map->lid_mask;
+}
+
+static void recalculate_apic_map(struct kvm *kvm)
+{
+	struct kvm_apic_map *new, *old = NULL;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+
+	mutex_lock(&kvm->arch.apic_map_lock);
+
+	if (!new)
+		goto out;
+
+	new->ldr_bits = 8;
+	/* flat mode is default */
+	new->cid_shift = 8;
+	new->cid_mask = 0;
+	new->lid_mask = 0xff;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr;
+
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		/*
+		 * All APICs have to be configured in the same mode by an OS.
+		 * We take advatage of this while building logical id loockup
+		 * table. After reset APICs are in xapic/flat mode, so if we
+		 * find apic with different setting we assume this is the mode
+		 * OS wants all apics to be in; build lookup table accordingly.
+		 */
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+		} else if (kvm_apic_sw_enabled(apic) &&
+				!new->cid_mask /* flat mode */ &&
+				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
+			new->cid_shift = 4;
+			new->cid_mask = 0xf;
+			new->lid_mask = 0xf;
+		}
+
+		new->phys_map[kvm_apic_id(apic)] = apic;
+
+		ldr = kvm_apic_get_reg(apic, APIC_LDR);
+		cid = apic_cluster_id(new, ldr);
+		lid = apic_logical_id(new, ldr);
+
+		if (lid)
+			new->logical_map[cid][ffs(lid) - 1] = apic;
+	}
+out:
+	old = rcu_dereference_protected(kvm->arch.apic_map,
+			lockdep_is_held(&kvm->arch.apic_map_lock));
+	rcu_assign_pointer(kvm->arch.apic_map, new);
+	mutex_unlock(&kvm->arch.apic_map_lock);
+
+	if (old)
+		kfree_rcu(old, rcu);
+}
+
+static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+{
+	apic_set_reg(apic, APIC_ID, id << 24);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
+static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
+{
+	apic_set_reg(apic, APIC_LDR, id);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
 	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -194,11 +293,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_LVR, v);
 }
 
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
-	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -483,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	return result;
 }
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r)
+{
+	struct kvm_apic_map *map;
+	unsigned long bitmap = 1;
+	struct kvm_lapic **dst;
+	int i;
+	bool ret = false;
+
+	*r = -1;
+
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*r = kvm_apic_set_irq(src->vcpu, irq);
+		return true;
+	}
+
+	if (irq->shorthand)
+		return false;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if (irq->dest_mode == 0) { /* physical mode */
+		if (irq->delivery_mode == APIC_DM_LOWEST ||
+				irq->dest_id == 0xff)
+			goto out;
+		dst = &map->phys_map[irq->dest_id & 0xff];
+	} else {
+		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+
+		dst = map->logical_map[apic_cluster_id(map, mda)];
+
+		bitmap = apic_logical_id(map, mda);
+
+		if (irq->delivery_mode == APIC_DM_LOWEST) {
+			int l = -1;
+			for_each_set_bit(i, &bitmap, 16) {
+				if (!dst[i])
+					continue;
+				if (l < 0)
+					l = i;
+				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+					l = i;
+			}
+
+			bitmap = (l >= 0) ? 1 << l : 0;
+		}
+	}
+
+	for_each_set_bit(i, &bitmap, 16) {
+		if (!dst[i])
+			continue;
+		if (*r < 0)
+			*r = 0;
+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+	}
+
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
@@ -886,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_ID, val);
+			kvm_apic_set_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -902,15 +1062,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_LDR:
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
 		else
 			ret = 1;
 		break;
 
 	case APIC_DFR:
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
 			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
-		else
+			recalculate_apic_map(apic->vcpu->kvm);
+		} else
 			ret = 1;
 		break;
 
@@ -1141,6 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 			static_key_slow_dec_deferred(&apic_hw_disabled);
 		else
 			static_key_slow_inc(&apic_hw_disabled.key);
+		recalculate_apic_map(vcpu->kvm);
 	}
 
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
@@ -1150,7 +1312,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	if (apic_x2apic_mode(apic)) {
 		u32 id = kvm_apic_id(apic);
 		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
-		apic_set_reg(apic, APIC_LDR, ldr);
+		kvm_apic_set_ldr(apic, ldr);
 	}
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
@@ -1175,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+	kvm_apic_set_id(apic, vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1186,7 +1348,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
-	apic_set_reg(apic, APIC_LDR, 0);
+	kvm_apic_set_ldr(apic, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
 	apic_set_reg(apic, APIC_ICR, 0);
 	apic_set_reg(apic, APIC_ICR2, 0);
@@ -1404,6 +1566,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+	/* call kvm_apic_set_id() to put apic into apic_map */
+	kvm_apic_set_id(apic, kvm_apic_id(apic));
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 615a8b03016..e5ebf9f3571 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r);
+
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 497226e49d4..fc2a0a132e4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6270,6 +6270,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+	mutex_init(&kvm->arch.apic_map_lock);
 
 	return 0;
 }
@@ -6322,6 +6323,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7118be0f2f2..3ca89c451d6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-			kvm_is_dm_lowest_prio(irq))
+			kvm_is_dm_lowest_prio(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+		return r;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
-- 
cgit v1.2.3-70-g09d2


From 50a011f6409e888d5f41343024d24885281f048c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 20 Sep 2012 14:43:54 +0200
Subject: kprobes/x86: Move skip_singlestep up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I get this warning:

  arch/x86/kernel/kprobes.c:544:23: warning: ‘skip_singlestep’ declared ‘static’ but never defined

on tip/auto-latest.

Put the skip_singlestep function declaration up, in
KPROBES_CAN_USE_FTRACE and drop the superfluous forward
declaration.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1348145034-16603-1-git-send-email-bp@amd64.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b7c2a85d192..57916c0d3cf 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,8 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
 static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb);
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+#endif
+
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -1061,21 +1076,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 }
 
 #ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb)
-{
-	/*
-	 * Emulate singlestep (and also recover regs->ip)
-	 * as if there is a 5byte nop
-	 */
-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
-	if (unlikely(p->post_handler)) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		p->post_handler(p, regs, 0);
-	}
-	__this_cpu_write(current_kprobe, NULL);
-}
-
 /* Ftrace callback handler for kprobes */
 void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				     struct ftrace_ops *ops, struct pt_regs *regs)
-- 
cgit v1.2.3-70-g09d2


From e76623d69408d0bd66a296c6ee5eae1b17a6adfc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 2 Aug 2012 22:12:06 +0400
Subject: x86: get rid of TIF_IRET hackery

TIF_NOTIFY_RESUME will work in precisely the same way; all that
is achieved by TIF_IRET is appearing that there's some work to be
done, so we end up on the iret exit path.  Just use NOTIFY_RESUME.
And for execve() do that in 32bit start_thread(), not sys_execve()
itself.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/include/asm/thread_info.h | 2 --
 arch/x86/kernel/process.c          | 8 --------
 arch/x86/kernel/process_32.c       | 5 +++++
 arch/x86/kernel/signal.c           | 4 ----
 arch/x86/kernel/vm86_32.c          | 6 +++---
 5 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007e..c509d07bdbd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,7 +79,6 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -104,7 +103,6 @@ struct thread_info {
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a8456f71..7162e9c1f59 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -351,14 +351,6 @@ long sys_execve(const char __user *name,
 	if (IS_ERR(filename))
 		return error;
 	error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
-	if (error == 0) {
-		/* Make sure we don't return using sysenter.. */
-                set_thread_flag(TIF_IRET);
-        }
-#endif
-
 	putname(filename);
 	return error;
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121..75fcad146de 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -194,6 +194,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	 * Free the old FP and other extended state
 	 */
 	free_thread_xstate(current);
+	/*
+	 * force it to the iret return path by making it look as if there was
+	 * some work pending.
+	 */
+	set_thread_flag(TIF_NOTIFY_RESUME);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376..c648fc52987 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -800,10 +800,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	}
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
-
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0baf2..5c9687b1bde 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 		if ((trapno == 3) || (trapno == 1)) {
 			KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
 			/* setting this flag forces the code in entry_32.S to
-			   call save_v86_state() and change the stack pointer
-			   to KVM86->regs32 */
-			set_thread_flag(TIF_IRET);
+			   the path where we call save_v86_state() and change
+			   the stack pointer to KVM86->regs32 */
+			set_thread_flag(TIF_NOTIFY_RESUME);
 			return 0;
 		}
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
-- 
cgit v1.2.3-70-g09d2


From 8e2c85aa6c7a158d967db75931db7f13d20d31f4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Sep 2012 13:39:47 -0400
Subject: um: let signal_delivered() do SIGTRAP on singlestepping into handler

... rather than duplicating that in sigframe setup code (and doing that
inconsistently, at that)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/signal.c | 6 +++++-
 arch/x86/um/signal.c    | 6 ------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 7362d58efc2..cc9c2350e41 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 			 struct k_sigaction *ka, siginfo_t *info)
 {
 	sigset_t *oldset = sigmask_to_save();
+	int singlestep = 0;
 	unsigned long sp;
 	int err;
 
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		singlestep = 1;
+
 	/* Did we come from a system call? */
 	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
@@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	if (err)
 		force_sigsegv(signr, current);
 	else
-		signal_delivered(signr, info, ka, regs, 0);
+		signal_delivered(signr, info, ka, regs, singlestep);
 }
 
 static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index a508cea1350..ba7363ecf89 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) 0;
 	PT_REGS_CX(regs) = (unsigned long) 0;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
@@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) &frame->info;
 	PT_REGS_CX(regs) = (unsigned long) &frame->uc;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From a4d94ff8aa864c05b33c2de1f8c5d0176d7a4b63 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Sep 2012 09:28:25 -0400
Subject: um: kill thread->forking

we only use that to tell copy_thread() done by syscall from that
done by kernel_thread().  However, it's easier to do simply by
checking PF_KTHREAD in thread flags.

Merge sys_clone() guts for 32bit and 64bit, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  9 ---------
 arch/um/kernel/process.c                |  8 ++++----
 arch/um/kernel/syscall.c                | 24 ++++++++++++------------
 arch/x86/um/shared/sysdep/syscalls.h    |  2 ++
 arch/x86/um/sys_call_table_32.c         |  2 +-
 arch/x86/um/syscalls_32.c               | 27 +++++++--------------------
 arch/x86/um/syscalls_64.c               | 23 +++--------------------
 7 files changed, 29 insertions(+), 66 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 69f1c57a8d0..33a6a2423bd 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,14 +20,6 @@ struct mm_struct;
 
 struct thread_struct {
 	struct task_struct *saved_task;
-	/*
-	 * This flag is set to 1 before calling do_fork (and analyzed in
-	 * copy_thread) to mark that we are begin called from userspace (fork /
-	 * vfork / clone), and reset to 0 after. It is left to 0 when called
-	 * from kernelspace (i.e. kernel_thread() or fork_idle(),
-	 * as of 2.6.11).
-	 */
-	int forking;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
@@ -58,7 +50,6 @@ struct thread_struct {
 
 #define INIT_THREAD \
 { \
-	.forking		= 0, \
 	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 57fc7028714..c5f5afa5074 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
+	int kthread = current->flags & PF_KTHREAD;
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
 
-	if (current->thread.forking) {
+	if (!kthread) {
 	  	memcpy(&p->thread.regs.regs, &regs->regs,
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		handler = fork_handler;
 
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
-	}
-	else {
+	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
 		p->thread.request.u.thread = current->thread.request.u.thread;
 		handler = new_thread_handler;
@@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
 	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
 
-	if (current->thread.forking) {
+	if (!kthread) {
 		clear_flushed_tls(p);
 
 		/*
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index f958cb876ee..a4c6d8eee74 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -17,25 +17,25 @@
 
 long sys_fork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+	return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
 }
 
 long sys_vfork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
 		      UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid)
+{
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+
+	return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
 }
 
 long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index bd9a89b67e4..ca255a805ed 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,3 +1,5 @@
+extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 68d1dc91b37..b5408cecac6 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -28,7 +28,7 @@
 #define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
+#define ptregs_clone i386_clone
 #define ptregs_vm86 sys_vm86
 #define ptregs_sigaltstack sys_sigaltstack
 #define ptregs_vfork sys_vfork
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index b853e8600b9..db444c7218f 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -3,37 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/syscalls.h>
+#include <sysdep/syscalls.h>
 
 /*
  * The prototype on i386 is:
  *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
  *
  * and the "newtls" arg. on i386 is read by copy_thread directly from the
  * register saved on the stack.
  */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tid, void *newtls, int __user *child_tid)
+long i386_clone(unsigned long clone_flags, unsigned long newsp,
+		int __user *parent_tid, void *newtls, int __user *child_tid)
 {
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
+	return sys_clone(clone_flags, newsp, parent_tid, child_tid);
 }
 
+
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index f3d82bb6e15..adb08eb5c22 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -5,12 +5,9 @@
  * Licensed under the GPL
  */
 
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <os.h>
 
 long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 {
@@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr)
 	return arch_prctl(current, code, (unsigned long __user *) addr);
 }
 
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid)
-{
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
-}
-
 void arch_switch_to(struct task_struct *to)
 {
 	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-- 
cgit v1.2.3-70-g09d2


From 24cc7fb69a5b5edfdff1d38c6a213d6a33648829 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.de>
Date: Thu, 20 Sep 2012 10:28:45 -0400
Subject: x86/kbuild: archscripts depends on scripts_basic

While building the SUSE kernel packages, which build the scripts,
make clean, and then build everything, we have been running into spurious
build failures. We tracked them down to a simple dependency issue:

$ make mrproper
  CLEAN   arch/x86/tools
  CLEAN   scripts/basic
$ cp patches/config/x86_64/desktop .config
$ make archscripts
  HOSTCC  arch/x86/tools/relocs
/bin/sh: scripts/basic/fixdep: No such file or directory
make[3]: *** [arch/x86/tools/relocs] Error 1
make[2]: *** [archscripts] Error 2
make[1]: *** [sub-make] Error 2
make: *** [all] Error 2

This was introduced by commit
6520fe55 (x86, realmode: 16-bit real-mode code support for relocs),
which added the archscripts dependency to archprepare.

This patch adds the scripts_basic dependency to the x86 archscripts.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861e..c098ca4671d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -138,7 +138,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
 
-archscripts:
+archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 ###
-- 
cgit v1.2.3-70-g09d2


From 26bf264e871a4b9a8ac09c21a2b518e7f23830d5 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Mon, 17 Sep 2012 16:31:13 +0800
Subject: KVM: x86: Export svm/vmx exit code and vector code to userspace

Exporting KVM exit information to userspace to be consumed by perf.

Signed-off-by: Dong Hao <haodong@linux.vnet.ibm.com>
[ Dong Hao <haodong@linux.vnet.ibm.com>: rebase it on acme's git tree ]
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1347870675-31495-2-git-send-email-haodong@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/include/asm/kvm.h      |  16 ++++
 arch/x86/include/asm/kvm_host.h |  16 ----
 arch/x86/include/asm/svm.h      | 205 +++++++++++++++++++++++++---------------
 arch/x86/include/asm/vmx.h      | 127 ++++++++++++++++---------
 arch/x86/kvm/trace.h            |  89 -----------------
 5 files changed, 230 insertions(+), 223 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617efd67..41e08cb6a09 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,22 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_PIT
 #define __KVM_HAVE_IOAPIC
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d64cf7..1eaa6b05667 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,22 +75,6 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-#define DE_VECTOR 0
-#define DB_VECTOR 1
-#define BP_VECTOR 3
-#define OF_VECTOR 4
-#define BR_VECTOR 5
-#define UD_VECTOR 6
-#define NM_VECTOR 7
-#define DF_VECTOR 8
-#define TS_VECTOR 10
-#define NP_VECTOR 11
-#define SS_VECTOR 12
-#define GP_VECTOR 13
-#define PF_VECTOR 14
-#define MF_VECTOR 16
-#define MC_VECTOR 18
-
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f2b83bc7d78..cdf5674dd23 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,6 +1,135 @@
 #ifndef __SVM_H
 #define __SVM_H
 
+#define SVM_EXIT_READ_CR0      0x000
+#define SVM_EXIT_READ_CR3      0x003
+#define SVM_EXIT_READ_CR4      0x004
+#define SVM_EXIT_READ_CR8      0x008
+#define SVM_EXIT_WRITE_CR0     0x010
+#define SVM_EXIT_WRITE_CR3     0x013
+#define SVM_EXIT_WRITE_CR4     0x014
+#define SVM_EXIT_WRITE_CR8     0x018
+#define SVM_EXIT_READ_DR0      0x020
+#define SVM_EXIT_READ_DR1      0x021
+#define SVM_EXIT_READ_DR2      0x022
+#define SVM_EXIT_READ_DR3      0x023
+#define SVM_EXIT_READ_DR4      0x024
+#define SVM_EXIT_READ_DR5      0x025
+#define SVM_EXIT_READ_DR6      0x026
+#define SVM_EXIT_READ_DR7      0x027
+#define SVM_EXIT_WRITE_DR0     0x030
+#define SVM_EXIT_WRITE_DR1     0x031
+#define SVM_EXIT_WRITE_DR2     0x032
+#define SVM_EXIT_WRITE_DR3     0x033
+#define SVM_EXIT_WRITE_DR4     0x034
+#define SVM_EXIT_WRITE_DR5     0x035
+#define SVM_EXIT_WRITE_DR6     0x036
+#define SVM_EXIT_WRITE_DR7     0x037
+#define SVM_EXIT_EXCP_BASE     0x040
+#define SVM_EXIT_INTR          0x060
+#define SVM_EXIT_NMI           0x061
+#define SVM_EXIT_SMI           0x062
+#define SVM_EXIT_INIT          0x063
+#define SVM_EXIT_VINTR         0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ     0x066
+#define SVM_EXIT_GDTR_READ     0x067
+#define SVM_EXIT_LDTR_READ     0x068
+#define SVM_EXIT_TR_READ       0x069
+#define SVM_EXIT_IDTR_WRITE    0x06a
+#define SVM_EXIT_GDTR_WRITE    0x06b
+#define SVM_EXIT_LDTR_WRITE    0x06c
+#define SVM_EXIT_TR_WRITE      0x06d
+#define SVM_EXIT_RDTSC         0x06e
+#define SVM_EXIT_RDPMC         0x06f
+#define SVM_EXIT_PUSHF         0x070
+#define SVM_EXIT_POPF          0x071
+#define SVM_EXIT_CPUID         0x072
+#define SVM_EXIT_RSM           0x073
+#define SVM_EXIT_IRET          0x074
+#define SVM_EXIT_SWINT         0x075
+#define SVM_EXIT_INVD          0x076
+#define SVM_EXIT_PAUSE         0x077
+#define SVM_EXIT_HLT           0x078
+#define SVM_EXIT_INVLPG        0x079
+#define SVM_EXIT_INVLPGA       0x07a
+#define SVM_EXIT_IOIO          0x07b
+#define SVM_EXIT_MSR           0x07c
+#define SVM_EXIT_TASK_SWITCH   0x07d
+#define SVM_EXIT_FERR_FREEZE   0x07e
+#define SVM_EXIT_SHUTDOWN      0x07f
+#define SVM_EXIT_VMRUN         0x080
+#define SVM_EXIT_VMMCALL       0x081
+#define SVM_EXIT_VMLOAD        0x082
+#define SVM_EXIT_VMSAVE        0x083
+#define SVM_EXIT_STGI          0x084
+#define SVM_EXIT_CLGI          0x085
+#define SVM_EXIT_SKINIT        0x086
+#define SVM_EXIT_RDTSCP        0x087
+#define SVM_EXIT_ICEBP         0x088
+#define SVM_EXIT_WBINVD        0x089
+#define SVM_EXIT_MONITOR       0x08a
+#define SVM_EXIT_MWAIT         0x08b
+#define SVM_EXIT_MWAIT_COND    0x08c
+#define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_NPF           0x400
+
+#define SVM_EXIT_ERR           -1
+
+#define SVM_EXIT_REASONS \
+	{ SVM_EXIT_READ_CR0,    "read_cr0" }, \
+	{ SVM_EXIT_READ_CR3,    "read_cr3" }, \
+	{ SVM_EXIT_READ_CR4,    "read_cr4" }, \
+	{ SVM_EXIT_READ_CR8,    "read_cr8" }, \
+	{ SVM_EXIT_WRITE_CR0,   "write_cr0" }, \
+	{ SVM_EXIT_WRITE_CR3,   "write_cr3" }, \
+	{ SVM_EXIT_WRITE_CR4,   "write_cr4" }, \
+	{ SVM_EXIT_WRITE_CR8,   "write_cr8" }, \
+	{ SVM_EXIT_READ_DR0,    "read_dr0" }, \
+	{ SVM_EXIT_READ_DR1,    "read_dr1" }, \
+	{ SVM_EXIT_READ_DR2,    "read_dr2" }, \
+	{ SVM_EXIT_READ_DR3,    "read_dr3" }, \
+	{ SVM_EXIT_WRITE_DR0,   "write_dr0" }, \
+	{ SVM_EXIT_WRITE_DR1,   "write_dr1" }, \
+	{ SVM_EXIT_WRITE_DR2,   "write_dr2" }, \
+	{ SVM_EXIT_WRITE_DR3,   "write_dr3" }, \
+	{ SVM_EXIT_WRITE_DR5,   "write_dr5" }, \
+	{ SVM_EXIT_WRITE_DR7,   "write_dr7" }, \
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" }, \
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" }, \
+	{ SVM_EXIT_INTR,        "interrupt" }, \
+	{ SVM_EXIT_NMI,         "nmi" }, \
+	{ SVM_EXIT_SMI,         "smi" }, \
+	{ SVM_EXIT_INIT,        "init" }, \
+	{ SVM_EXIT_VINTR,       "vintr" }, \
+	{ SVM_EXIT_CPUID,       "cpuid" }, \
+	{ SVM_EXIT_INVD,        "invd" }, \
+	{ SVM_EXIT_HLT,         "hlt" }, \
+	{ SVM_EXIT_INVLPG,      "invlpg" }, \
+	{ SVM_EXIT_INVLPGA,     "invlpga" }, \
+	{ SVM_EXIT_IOIO,        "io" }, \
+	{ SVM_EXIT_MSR,         "msr" }, \
+	{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+	{ SVM_EXIT_SHUTDOWN,    "shutdown" }, \
+	{ SVM_EXIT_VMRUN,       "vmrun" }, \
+	{ SVM_EXIT_VMMCALL,     "hypercall" }, \
+	{ SVM_EXIT_VMLOAD,      "vmload" }, \
+	{ SVM_EXIT_VMSAVE,      "vmsave" }, \
+	{ SVM_EXIT_STGI,        "stgi" }, \
+	{ SVM_EXIT_CLGI,        "clgi" }, \
+	{ SVM_EXIT_SKINIT,      "skinit" }, \
+	{ SVM_EXIT_WBINVD,      "wbinvd" }, \
+	{ SVM_EXIT_MONITOR,     "monitor" }, \
+	{ SVM_EXIT_MWAIT,       "mwait" }, \
+	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_NPF,         "npf" }
+
+#ifdef __KERNEL__
+
 enum {
 	INTERCEPT_INTR,
 	INTERCEPT_NMI,
@@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #define SVM_EXITINFO_REG_MASK 0x0F
 
-#define	SVM_EXIT_READ_CR0 	0x000
-#define	SVM_EXIT_READ_CR3 	0x003
-#define	SVM_EXIT_READ_CR4 	0x004
-#define	SVM_EXIT_READ_CR8 	0x008
-#define	SVM_EXIT_WRITE_CR0 	0x010
-#define	SVM_EXIT_WRITE_CR3 	0x013
-#define	SVM_EXIT_WRITE_CR4 	0x014
-#define	SVM_EXIT_WRITE_CR8 	0x018
-#define	SVM_EXIT_READ_DR0 	0x020
-#define	SVM_EXIT_READ_DR1 	0x021
-#define	SVM_EXIT_READ_DR2 	0x022
-#define	SVM_EXIT_READ_DR3 	0x023
-#define	SVM_EXIT_READ_DR4 	0x024
-#define	SVM_EXIT_READ_DR5 	0x025
-#define	SVM_EXIT_READ_DR6 	0x026
-#define	SVM_EXIT_READ_DR7 	0x027
-#define	SVM_EXIT_WRITE_DR0 	0x030
-#define	SVM_EXIT_WRITE_DR1 	0x031
-#define	SVM_EXIT_WRITE_DR2 	0x032
-#define	SVM_EXIT_WRITE_DR3 	0x033
-#define	SVM_EXIT_WRITE_DR4 	0x034
-#define	SVM_EXIT_WRITE_DR5 	0x035
-#define	SVM_EXIT_WRITE_DR6 	0x036
-#define	SVM_EXIT_WRITE_DR7 	0x037
-#define SVM_EXIT_EXCP_BASE      0x040
-#define SVM_EXIT_INTR		0x060
-#define SVM_EXIT_NMI		0x061
-#define SVM_EXIT_SMI		0x062
-#define SVM_EXIT_INIT		0x063
-#define SVM_EXIT_VINTR		0x064
-#define SVM_EXIT_CR0_SEL_WRITE	0x065
-#define SVM_EXIT_IDTR_READ	0x066
-#define SVM_EXIT_GDTR_READ	0x067
-#define SVM_EXIT_LDTR_READ	0x068
-#define SVM_EXIT_TR_READ	0x069
-#define SVM_EXIT_IDTR_WRITE	0x06a
-#define SVM_EXIT_GDTR_WRITE	0x06b
-#define SVM_EXIT_LDTR_WRITE	0x06c
-#define SVM_EXIT_TR_WRITE	0x06d
-#define SVM_EXIT_RDTSC		0x06e
-#define SVM_EXIT_RDPMC		0x06f
-#define SVM_EXIT_PUSHF		0x070
-#define SVM_EXIT_POPF		0x071
-#define SVM_EXIT_CPUID		0x072
-#define SVM_EXIT_RSM		0x073
-#define SVM_EXIT_IRET		0x074
-#define SVM_EXIT_SWINT		0x075
-#define SVM_EXIT_INVD		0x076
-#define SVM_EXIT_PAUSE		0x077
-#define SVM_EXIT_HLT		0x078
-#define SVM_EXIT_INVLPG		0x079
-#define SVM_EXIT_INVLPGA	0x07a
-#define SVM_EXIT_IOIO		0x07b
-#define SVM_EXIT_MSR		0x07c
-#define SVM_EXIT_TASK_SWITCH	0x07d
-#define SVM_EXIT_FERR_FREEZE	0x07e
-#define SVM_EXIT_SHUTDOWN	0x07f
-#define SVM_EXIT_VMRUN		0x080
-#define SVM_EXIT_VMMCALL	0x081
-#define SVM_EXIT_VMLOAD		0x082
-#define SVM_EXIT_VMSAVE		0x083
-#define SVM_EXIT_STGI		0x084
-#define SVM_EXIT_CLGI		0x085
-#define SVM_EXIT_SKINIT		0x086
-#define SVM_EXIT_RDTSCP		0x087
-#define SVM_EXIT_ICEBP		0x088
-#define SVM_EXIT_WBINVD		0x089
-#define SVM_EXIT_MONITOR	0x08a
-#define SVM_EXIT_MWAIT		0x08b
-#define SVM_EXIT_MWAIT_COND	0x08c
-#define SVM_EXIT_XSETBV		0x08d
-#define SVM_EXIT_NPF  		0x400
-
-#define SVM_EXIT_ERR		-1
-
 #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
 
 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
@@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #endif
 
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb963595..36ec21c36d6 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,88 @@
  *
  */
 
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW          8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY  41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD              54
+#define EXIT_REASON_XSETBV              55
+#define EXIT_REASON_INVPCID             58
+
+#define VMX_EXIT_REASONS \
+	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
+	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
+	{ EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
+	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
+	{ EXIT_REASON_CPUID,                 "CPUID" }, \
+	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
+	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
+	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
+	{ EXIT_REASON_VMCALL,                "VMCALL" }, \
+	{ EXIT_REASON_VMCLEAR,               "VMCLEAR" }, \
+	{ EXIT_REASON_VMLAUNCH,              "VMLAUNCH" }, \
+	{ EXIT_REASON_VMPTRLD,               "VMPTRLD" }, \
+	{ EXIT_REASON_VMPTRST,               "VMPTRST" }, \
+	{ EXIT_REASON_VMREAD,                "VMREAD" }, \
+	{ EXIT_REASON_VMRESUME,              "VMRESUME" }, \
+	{ EXIT_REASON_VMWRITE,               "VMWRITE" }, \
+	{ EXIT_REASON_VMOFF,                 "VMOFF" }, \
+	{ EXIT_REASON_VMON,                  "VMON" }, \
+	{ EXIT_REASON_CR_ACCESS,             "CR_ACCESS" }, \
+	{ EXIT_REASON_DR_ACCESS,             "DR_ACCESS" }, \
+	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
+	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
+	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
+	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
+	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
+	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
+	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
+	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
+	{ EXIT_REASON_WBINVD,                "WBINVD" }
+
+#ifdef __KERNEL__
+
 #include <linux/types.h>
 
 /*
@@ -241,49 +323,6 @@ enum vmcs_field {
 	HOST_RIP                        = 0x00006c16,
 };
 
-#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI       0
-#define EXIT_REASON_EXTERNAL_INTERRUPT  1
-#define EXIT_REASON_TRIPLE_FAULT        2
-
-#define EXIT_REASON_PENDING_INTERRUPT   7
-#define EXIT_REASON_NMI_WINDOW		8
-#define EXIT_REASON_TASK_SWITCH         9
-#define EXIT_REASON_CPUID               10
-#define EXIT_REASON_HLT                 12
-#define EXIT_REASON_INVD                13
-#define EXIT_REASON_INVLPG              14
-#define EXIT_REASON_RDPMC               15
-#define EXIT_REASON_RDTSC               16
-#define EXIT_REASON_VMCALL              18
-#define EXIT_REASON_VMCLEAR             19
-#define EXIT_REASON_VMLAUNCH            20
-#define EXIT_REASON_VMPTRLD             21
-#define EXIT_REASON_VMPTRST             22
-#define EXIT_REASON_VMREAD              23
-#define EXIT_REASON_VMRESUME            24
-#define EXIT_REASON_VMWRITE             25
-#define EXIT_REASON_VMOFF               26
-#define EXIT_REASON_VMON                27
-#define EXIT_REASON_CR_ACCESS           28
-#define EXIT_REASON_DR_ACCESS           29
-#define EXIT_REASON_IO_INSTRUCTION      30
-#define EXIT_REASON_MSR_READ            31
-#define EXIT_REASON_MSR_WRITE           32
-#define EXIT_REASON_INVALID_STATE	33
-#define EXIT_REASON_MWAIT_INSTRUCTION   36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION   40
-#define EXIT_REASON_MCE_DURING_VMENTRY	 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS         44
-#define EXIT_REASON_EPT_VIOLATION       48
-#define EXIT_REASON_EPT_MISCONFIG       49
-#define EXIT_REASON_WBINVD		54
-#define EXIT_REASON_XSETBV		55
-#define EXIT_REASON_INVPCID		58
-
 /*
  * Interruption-information format
  */
@@ -488,3 +527,5 @@ enum vm_instruction_error_number {
 };
 
 #endif
+
+#endif
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a71faf727ff..bca63f04dcc 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic,
 #define KVM_ISA_VMX   1
 #define KVM_ISA_SVM   2
 
-#define VMX_EXIT_REASONS \
-	{ EXIT_REASON_EXCEPTION_NMI,		"EXCEPTION_NMI" }, \
-	{ EXIT_REASON_EXTERNAL_INTERRUPT,	"EXTERNAL_INTERRUPT" }, \
-	{ EXIT_REASON_TRIPLE_FAULT,		"TRIPLE_FAULT" }, \
-	{ EXIT_REASON_PENDING_INTERRUPT,	"PENDING_INTERRUPT" }, \
-	{ EXIT_REASON_NMI_WINDOW,		"NMI_WINDOW" }, \
-	{ EXIT_REASON_TASK_SWITCH,		"TASK_SWITCH" }, \
-	{ EXIT_REASON_CPUID,			"CPUID" }, \
-	{ EXIT_REASON_HLT,			"HLT" }, \
-	{ EXIT_REASON_INVLPG,			"INVLPG" }, \
-	{ EXIT_REASON_RDPMC,			"RDPMC" }, \
-	{ EXIT_REASON_RDTSC,			"RDTSC" }, \
-	{ EXIT_REASON_VMCALL,			"VMCALL" }, \
-	{ EXIT_REASON_VMCLEAR,			"VMCLEAR" }, \
-	{ EXIT_REASON_VMLAUNCH,			"VMLAUNCH" }, \
-	{ EXIT_REASON_VMPTRLD,			"VMPTRLD" }, \
-	{ EXIT_REASON_VMPTRST,			"VMPTRST" }, \
-	{ EXIT_REASON_VMREAD,			"VMREAD" }, \
-	{ EXIT_REASON_VMRESUME,			"VMRESUME" }, \
-	{ EXIT_REASON_VMWRITE,			"VMWRITE" }, \
-	{ EXIT_REASON_VMOFF,			"VMOFF" }, \
-	{ EXIT_REASON_VMON,			"VMON" }, \
-	{ EXIT_REASON_CR_ACCESS,		"CR_ACCESS" }, \
-	{ EXIT_REASON_DR_ACCESS,		"DR_ACCESS" }, \
-	{ EXIT_REASON_IO_INSTRUCTION,		"IO_INSTRUCTION" }, \
-	{ EXIT_REASON_MSR_READ,			"MSR_READ" }, \
-	{ EXIT_REASON_MSR_WRITE,		"MSR_WRITE" }, \
-	{ EXIT_REASON_MWAIT_INSTRUCTION,	"MWAIT_INSTRUCTION" }, \
-	{ EXIT_REASON_MONITOR_INSTRUCTION,	"MONITOR_INSTRUCTION" }, \
-	{ EXIT_REASON_PAUSE_INSTRUCTION,	"PAUSE_INSTRUCTION" }, \
-	{ EXIT_REASON_MCE_DURING_VMENTRY,	"MCE_DURING_VMENTRY" }, \
-	{ EXIT_REASON_TPR_BELOW_THRESHOLD,	"TPR_BELOW_THRESHOLD" },	\
-	{ EXIT_REASON_APIC_ACCESS,		"APIC_ACCESS" }, \
-	{ EXIT_REASON_EPT_VIOLATION,		"EPT_VIOLATION" }, \
-	{ EXIT_REASON_EPT_MISCONFIG,		"EPT_MISCONFIG" }, \
-	{ EXIT_REASON_WBINVD,			"WBINVD" }
-
-#define SVM_EXIT_REASONS \
-	{ SVM_EXIT_READ_CR0,			"read_cr0" }, \
-	{ SVM_EXIT_READ_CR3,			"read_cr3" }, \
-	{ SVM_EXIT_READ_CR4,			"read_cr4" }, \
-	{ SVM_EXIT_READ_CR8,			"read_cr8" }, \
-	{ SVM_EXIT_WRITE_CR0,			"write_cr0" }, \
-	{ SVM_EXIT_WRITE_CR3,			"write_cr3" }, \
-	{ SVM_EXIT_WRITE_CR4,			"write_cr4" }, \
-	{ SVM_EXIT_WRITE_CR8,			"write_cr8" }, \
-	{ SVM_EXIT_READ_DR0,			"read_dr0" }, \
-	{ SVM_EXIT_READ_DR1,			"read_dr1" }, \
-	{ SVM_EXIT_READ_DR2,			"read_dr2" }, \
-	{ SVM_EXIT_READ_DR3,			"read_dr3" }, \
-	{ SVM_EXIT_WRITE_DR0,			"write_dr0" }, \
-	{ SVM_EXIT_WRITE_DR1,			"write_dr1" }, \
-	{ SVM_EXIT_WRITE_DR2,			"write_dr2" }, \
-	{ SVM_EXIT_WRITE_DR3,			"write_dr3" }, \
-	{ SVM_EXIT_WRITE_DR5,			"write_dr5" }, \
-	{ SVM_EXIT_WRITE_DR7,			"write_dr7" }, \
-	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" }, \
-	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" }, \
-	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" }, \
-	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" }, \
-	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" }, \
-	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" }, \
-	{ SVM_EXIT_INTR,			"interrupt" }, \
-	{ SVM_EXIT_NMI,				"nmi" }, \
-	{ SVM_EXIT_SMI,				"smi" }, \
-	{ SVM_EXIT_INIT,			"init" }, \
-	{ SVM_EXIT_VINTR,			"vintr" }, \
-	{ SVM_EXIT_CPUID,			"cpuid" }, \
-	{ SVM_EXIT_INVD,			"invd" }, \
-	{ SVM_EXIT_HLT,				"hlt" }, \
-	{ SVM_EXIT_INVLPG,			"invlpg" }, \
-	{ SVM_EXIT_INVLPGA,			"invlpga" }, \
-	{ SVM_EXIT_IOIO,			"io" }, \
-	{ SVM_EXIT_MSR,				"msr" }, \
-	{ SVM_EXIT_TASK_SWITCH,			"task_switch" }, \
-	{ SVM_EXIT_SHUTDOWN,			"shutdown" }, \
-	{ SVM_EXIT_VMRUN,			"vmrun" }, \
-	{ SVM_EXIT_VMMCALL,			"hypercall" }, \
-	{ SVM_EXIT_VMLOAD,			"vmload" }, \
-	{ SVM_EXIT_VMSAVE,			"vmsave" }, \
-	{ SVM_EXIT_STGI,			"stgi" }, \
-	{ SVM_EXIT_CLGI,			"clgi" }, \
-	{ SVM_EXIT_SKINIT,			"skinit" }, \
-	{ SVM_EXIT_WBINVD,			"wbinvd" }, \
-	{ SVM_EXIT_MONITOR,			"monitor" }, \
-	{ SVM_EXIT_MWAIT,			"mwait" }, \
-	{ SVM_EXIT_XSETBV,			"xsetbv" }, \
-	{ SVM_EXIT_NPF,				"npf" }
-
 /*
  * Tracepoint for kvm guest exit:
  */
-- 
cgit v1.2.3-70-g09d2


From 8bd753be7a96443dd5111cb07ed5907a3787c978 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:06 -0700
Subject: x86-32, mm: The WP test should be done on a kernel page

PAGE_READONLY includes user permission, but this is a page used
exclusively by the kernel; use PAGE_KERNEL_RO instead.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-3-git-send-email-hpa@linux.intel.com
---
 arch/x86/mm/init_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f85ce..e537b351e8c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -712,7 +712,7 @@ static void __init test_wp_bit(void)
   "Checking if this processor honours the WP bit even in supervisor mode...");
 
 	/* Any page-aligned address will do, the test is non-destructive */
-	__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
+	__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO);
 	boot_cpu_data.wp_works_ok = do_test_wp_bit();
 	clear_fixmap(FIX_WP_TEST);
 
-- 
cgit v1.2.3-70-g09d2


From 85fdf05cc395f23384cb0adb22765cbaa9653b54 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:07 -0700
Subject: x86, smap: Add CR4 bit for SMAP

Add X86_CR4_SMAP to <asm/processor-flags.h>.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-4-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/processor-flags.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index aea1d1d848c..680cf09ed10 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -65,6 +65,7 @@
 #define X86_CR4_PCIDE	0x00020000 /* enable PCID support */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
 #define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
+#define X86_CR4_SMAP	0x00200000 /* enable SMAP support */
 
 /*
  * x86-64 Task Priority Register, CR8
-- 
cgit v1.2.3-70-g09d2


From 9cebed423c84a56b871327dd77e555d1d2186a6b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:08 -0700
Subject: x86, alternative: Use .pushsection/.popsection

.section/.previous doesn't nest.  Use .pushsection/.popsection in
<asm/alternative.h> so that they can be properly nested.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-5-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/alternative-asm.h |  4 ++--
 arch/x86/include/asm/alternative.h     | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 952bd0100c5..018d29fe634 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -5,10 +5,10 @@
 #ifdef CONFIG_SMP
 	.macro LOCK_PREFIX
 672:	lock
-	.section .smp_locks,"a"
+	.pushsection .smp_locks,"a"
 	.balign 4
 	.long 672b - .
-	.previous
+	.popsection
 	.endm
 #else
 	.macro LOCK_PREFIX
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 70780689599..87bc00d4efe 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -29,10 +29,10 @@
 
 #ifdef CONFIG_SMP
 #define LOCK_PREFIX_HERE \
-		".section .smp_locks,\"a\"\n"	\
-		".balign 4\n"			\
-		".long 671f - .\n" /* offset */	\
-		".previous\n"			\
+		".pushsection .smp_locks,\"a\"\n"	\
+		".balign 4\n"				\
+		".long 671f - .\n" /* offset */		\
+		".popsection\n"				\
 		"671:"
 
 #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
@@ -99,30 +99,30 @@ static inline int alternatives_text_reserved(void *start, void *end)
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
 	OLDINSTR(oldinstr)						\
-	".section .altinstructions,\"a\"\n"				\
+	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature, 1)					\
-	".previous\n"							\
-	".section .discard,\"aw\",@progbits\n"				\
+	".popsection\n"							\
+	".pushsection .discard,\"aw\",@progbits\n"			\
 	DISCARD_ENTRY(1)						\
-	".previous\n"							\
-	".section .altinstr_replacement, \"ax\"\n"			\
+	".popsection\n"							\
+	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
-	".previous"
+	".popsection"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
 	OLDINSTR(oldinstr)						\
-	".section .altinstructions,\"a\"\n"				\
+	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature1, 1)					\
 	ALTINSTR_ENTRY(feature2, 2)					\
-	".previous\n"							\
-	".section .discard,\"aw\",@progbits\n"				\
+	".popsection\n"							\
+	".pushsection .discard,\"aw\",@progbits\n"			\
 	DISCARD_ENTRY(1)						\
 	DISCARD_ENTRY(2)						\
-	".previous\n"							\
-	".section .altinstr_replacement, \"ax\"\n"			\
+	".popsection\n"							\
+	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
-	".previous"
+	".popsection"
 
 /*
  * This must be included *after* the definition of ALTERNATIVE due to
-- 
cgit v1.2.3-70-g09d2


From 76f30759f690db21ca567a20665ed2679ad3235b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:09 -0700
Subject: x86, alternative: Add header guards to <asm/alternative-asm.h>

Add header guards to protect <asm/alternative-asm.h> against multiple
inclusion.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-6-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/alternative-asm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 018d29fe634..372231c22a4 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_ALTERNATIVE_ASM_H
+#define _ASM_X86_ALTERNATIVE_ASM_H
+
 #ifdef __ASSEMBLY__
 
 #include <asm/asm.h>
@@ -24,3 +27,5 @@
 .endm
 
 #endif  /*  __ASSEMBLY__  */
+
+#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
-- 
cgit v1.2.3-70-g09d2


From 51ae4a2d775e1ee456282d7c60e49693d0a8555d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:10 -0700
Subject: x86, smap: Add a header file with macros for STAC/CLAC

The STAC/CLAC instructions are only available with SMAP, but on the
other hand they aren't needed if SMAP is not available, or before we
start to run userspace, so construct them as alternatives which start
out as noops and are enabled by the alternatives mechanism.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-7-git-send-email-hpa@linux.intel.com
---
 arch/x86/Kconfig            | 11 ++++++
 arch/x86/include/asm/smap.h | 91 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 arch/x86/include/asm/smap.h

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..5ce86941333 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1487,6 +1487,17 @@ config ARCH_RANDOM
 	  If supported, this is a high bandwidth, cryptographically
 	  secure hardware random number generator.
 
+config X86_SMAP
+	def_bool y
+	prompt "Supervisor Mode Access Prevention" if EXPERT
+	---help---
+	  Supervisor Mode Access Prevention (SMAP) is a security
+	  feature in newer Intel processors.  There is a small
+	  performance cost if this enabled and turned on; there is
+	  also a small increase in the kernel size if this is enabled.
+
+	  If unsure, say Y.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
new file mode 100644
index 00000000000..3989c2492eb
--- /dev/null
+++ b/arch/x86/include/asm/smap.h
@@ -0,0 +1,91 @@
+/*
+ * Supervisor Mode Access Prevention support
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _ASM_X86_SMAP_H
+#define _ASM_X86_SMAP_H
+
+#include <linux/stringify.h>
+#include <asm/nops.h>
+#include <asm/cpufeature.h>
+
+/* "Raw" instruction opcodes */
+#define __ASM_CLAC	.byte 0x0f,0x01,0xca
+#define __ASM_STAC	.byte 0x0f,0x01,0xcb
+
+#ifdef __ASSEMBLY__
+
+#include <asm/alternative-asm.h>
+
+#ifdef CONFIG_X86_SMAP
+
+#define ASM_CLAC							\
+	661: ASM_NOP3 ;							\
+	.pushsection .altinstr_replacement, "ax" ;			\
+	662: __ASM_CLAC ;						\
+	.popsection ;							\
+	.pushsection .altinstructions, "a" ;				\
+	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
+	.popsection
+
+#define ASM_STAC							\
+	661: ASM_NOP3 ;							\
+	.pushsection .altinstr_replacement, "ax" ;			\
+	662: __ASM_STAC ;						\
+	.popsection ;							\
+	.pushsection .altinstructions, "a" ;				\
+	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
+	.popsection
+
+#else /* CONFIG_X86_SMAP */
+
+#define ASM_CLAC
+#define ASM_STAC
+
+#endif /* CONFIG_X86_SMAP */
+
+#else /* __ASSEMBLY__ */
+
+#include <asm/alternative.h>
+
+#ifdef CONFIG_X86_SMAP
+
+static inline void clac(void)
+{
+	/* Note: a barrier is implicit in alternative() */
+	alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+}
+
+static inline void stac(void)
+{
+	/* Note: a barrier is implicit in alternative() */
+	alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+}
+
+/* These macros can be used in asm() statements */
+#define ASM_CLAC \
+	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+#define ASM_STAC \
+	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+
+#else /* CONFIG_X86_SMAP */
+
+static inline void clac(void) { }
+static inline void stac(void) { }
+
+#define ASM_CLAC
+#define ASM_STAC
+
+#endif /* CONFIG_X86_SMAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_SMAP_H */
-- 
cgit v1.2.3-70-g09d2


From a052858fabb376b695f2c125633daa6728e0f284 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:11 -0700
Subject: x86, uaccess: Merge prototypes for clear_user/__clear_user

The prototypes for clear_user() and __clear_user() are identical in
the 32- and 64-bit headers.  No functionality change.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-8-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/uaccess.h    | 3 +++
 arch/x86/include/asm/uaccess_32.h | 3 ---
 arch/x86/include/asm/uaccess_64.h | 3 ---
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index e1f3a17034f..2c7df3d184f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -569,6 +569,9 @@ strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strlen_user(const char __user *str);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+unsigned long __must_check clear_user(void __user *mem, unsigned long len);
+unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 576e39bca6a..7f760a9f1f6 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,7 +213,4 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	return n;
 }
 
-unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
-
 #endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index d8def8b3dba..142810c457d 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -217,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 	}
 }
 
-__must_check unsigned long clear_user(void __user *mem, unsigned long len);
-__must_check unsigned long __clear_user(void __user *mem, unsigned long len);
-
 static __must_check __always_inline int
 __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
 {
-- 
cgit v1.2.3-70-g09d2


From 63bcff2a307b9bcc712a8251eb27df8b2e117967 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:12 -0700
Subject: x86, smap: Add STAC and CLAC instructions to control user space
 access

When Supervisor Mode Access Prevention (SMAP) is enabled, access to
userspace from the kernel is controlled by the AC flag.  To make the
performance of manipulating that flag acceptable, there are two new
instructions, STAC and CLAC, to set and clear it.

This patch adds those instructions, via alternative(), when the SMAP
feature is enabled.  It also adds X86_EFLAGS_AC unconditionally to the
SYSCALL entry mask; there is simply no reason to make that one
conditional.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com
---
 arch/x86/ia32/ia32entry.S           |  6 ++++++
 arch/x86/include/asm/fpu-internal.h | 10 ++++++----
 arch/x86/include/asm/futex.h        | 19 +++++++++++++------
 arch/x86/include/asm/smap.h         |  4 ++--
 arch/x86/include/asm/uaccess.h      | 31 +++++++++++++++++++------------
 arch/x86/include/asm/xsave.h        | 10 ++++++----
 arch/x86/kernel/cpu/common.c        |  3 ++-
 arch/x86/kernel/entry_64.S          | 11 ++++++++++-
 arch/x86/lib/copy_user_64.S         |  7 +++++++
 arch/x86/lib/copy_user_nocache_64.S |  3 +++
 arch/x86/lib/getuser.S              | 10 ++++++++++
 arch/x86/lib/putuser.S              |  8 +++++++-
 arch/x86/lib/usercopy_32.c          | 13 ++++++++++++-
 arch/x86/lib/usercopy_64.c          |  3 +++
 14 files changed, 106 insertions(+), 32 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20e5f7ba0e6..9c289504e68 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
 #include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
 
@@ -146,8 +147,10 @@ ENTRY(ia32_sysenter_target)
 	SAVE_ARGS 0,1,0
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
+	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
+	ASM_CLAC
 	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	CFI_REMEMBER_STATE
@@ -301,8 +304,10 @@ ENTRY(ia32_cstar_target)
 	/* no need to do an access_ok check here because r8 has been
 	   32bit zero extended */ 
 	/* hardware stack frame is complete now */	
+	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
+	ASM_CLAC
 	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	CFI_REMEMBER_STATE
@@ -365,6 +370,7 @@ cstar_tracesys:
 END(ia32_cstar_target)
 				
 ia32_badarg:
+	ASM_CLAC
 	movq $-EFAULT,%rax
 	jmp ia32_sysret
 	CFI_ENDPROC
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 75f4c6d6a33..0fe13583a02 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -126,8 +126,9 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 
 	/* See comment in fxsave() below. */
 #ifdef CONFIG_AS_FXSAVEQ
-	asm volatile("1:  fxsaveq %[fx]\n\t"
-		     "2:\n"
+	asm volatile(ASM_STAC "\n"
+		     "1:  fxsaveq %[fx]\n\t"
+		     "2: " ASM_CLAC "\n"
 		     ".section .fixup,\"ax\"\n"
 		     "3:  movl $-1,%[err]\n"
 		     "    jmp  2b\n"
@@ -136,8 +137,9 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		     : [err] "=r" (err), [fx] "=m" (*fx)
 		     : "0" (0));
 #else
-	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
-		     "2:\n"
+	asm volatile(ASM_STAC "\n"
+		     "1:  rex64/fxsave (%[fx])\n\t"
+		     "2: " ASM_CLAC "\n"
 		     ".section .fixup,\"ax\"\n"
 		     "3:  movl $-1,%[err]\n"
 		     "    jmp  2b\n"
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 71ecbcba1a4..f373046e63e 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -9,10 +9,13 @@
 #include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/processor.h>
+#include <asm/smap.h>
 
 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
-	asm volatile("1:\t" insn "\n"				\
-		     "2:\t.section .fixup,\"ax\"\n"		\
+	asm volatile("\t" ASM_STAC "\n"				\
+		     "1:\t" insn "\n"				\
+		     "2:\t" ASM_CLAC "\n"			\
+		     "\t.section .fixup,\"ax\"\n"		\
 		     "3:\tmov\t%3, %1\n"			\
 		     "\tjmp\t2b\n"				\
 		     "\t.previous\n"				\
@@ -21,12 +24,14 @@
 		     : "i" (-EFAULT), "0" (oparg), "1" (0))
 
 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
-	asm volatile("1:\tmovl	%2, %0\n"			\
+	asm volatile("\t" ASM_STAC "\n"				\
+		     "1:\tmovl	%2, %0\n"			\
 		     "\tmovl\t%0, %3\n"				\
 		     "\t" insn "\n"				\
 		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
 		     "\tjnz\t1b\n"				\
-		     "3:\t.section .fixup,\"ax\"\n"		\
+		     "3:\t" ASM_CLAC "\n"			\
+		     "\t.section .fixup,\"ax\"\n"		\
 		     "4:\tmov\t%5, %1\n"			\
 		     "\tjmp\t3b\n"				\
 		     "\t.previous\n"				\
@@ -122,8 +127,10 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
-		     "2:\t.section .fixup, \"ax\"\n"
+	asm volatile("\t" ASM_STAC "\n"
+		     "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
+		     "2:\t" ASM_CLAC "\n"
+		     "\t.section .fixup, \"ax\"\n"
 		     "3:\tmov     %3, %0\n"
 		     "\tjmp     2b\n"
 		     "\t.previous\n"
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 3989c2492eb..8d3120f4e27 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -58,13 +58,13 @@
 
 #ifdef CONFIG_X86_SMAP
 
-static inline void clac(void)
+static __always_inline void clac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
 	alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
-static inline void stac(void)
+static __always_inline void stac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
 	alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2c7df3d184f..b92ece13c23 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -9,6 +9,7 @@
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <asm/smap.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -192,9 +193,10 @@ extern int __get_user_bad(void);
 
 #ifdef CONFIG_X86_32
 #define __put_user_asm_u64(x, addr, err, errret)			\
-	asm volatile("1:	movl %%eax,0(%2)\n"			\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	movl %%eax,0(%2)\n"			\
 		     "2:	movl %%edx,4(%2)\n"			\
-		     "3:\n"						\
+		     "3: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "4:	movl %3,%0\n"				\
 		     "	jmp 3b\n"					\
@@ -205,9 +207,10 @@ extern int __get_user_bad(void);
 		     : "A" (x), "r" (addr), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex_u64(x, addr)					\
-	asm volatile("1:	movl %%eax,0(%1)\n"			\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	movl %%eax,0(%1)\n"			\
 		     "2:	movl %%edx,4(%1)\n"			\
-		     "3:\n"						\
+		     "3: " ASM_CLAC "\n"				\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     _ASM_EXTABLE_EX(2b, 3b)				\
 		     : : "A" (x), "r" (addr))
@@ -379,8 +382,9 @@ do {									\
 } while (0)
 
 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
-	asm volatile("1:	mov"itype" %2,%"rtype"1\n"		\
-		     "2:\n"						\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	mov"itype" %2,%"rtype"1\n"		\
+		     "2: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "3:	mov %3,%0\n"				\
 		     "	xor"itype" %"rtype"1,%"rtype"1\n"		\
@@ -412,8 +416,9 @@ do {									\
 } while (0)
 
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
-		     "2:\n"						\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	mov"itype" %1,%"rtype"0\n"		\
+		     "2: " ASM_CLAC "\n"				\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : ltype(x) : "m" (__m(addr)))
 
@@ -443,8 +448,9 @@ struct __large_struct { unsigned long buf[100]; };
  * aliasing issues.
  */
 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
-	asm volatile("1:	mov"itype" %"rtype"1,%2\n"		\
-		     "2:\n"						\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	mov"itype" %"rtype"1,%2\n"		\
+		     "2: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "3:	mov %3,%0\n"				\
 		     "	jmp 2b\n"					\
@@ -454,8 +460,9 @@ struct __large_struct { unsigned long buf[100]; };
 		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\
-		     "2:\n"						\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	mov"itype" %"rtype"0,%1\n"		\
+		     "2: " ASM_CLAC "\n"				\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : : ltype(x), "m" (__m(addr)))
 
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 8a1b6f9b594..2a923bd5434 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -74,8 +74,9 @@ static inline int xsave_user(struct xsave_struct __user *buf)
 	if (unlikely(err))
 		return -EFAULT;
 
-	__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
-			     "2:\n"
+	__asm__ __volatile__(ASM_STAC "\n"
+			     "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
+			     "2: " ASM_CLAC "\n"
 			     ".section .fixup,\"ax\"\n"
 			     "3:  movl $-1,%[err]\n"
 			     "    jmp  2b\n"
@@ -97,8 +98,9 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 
-	__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
-			     "2:\n"
+	__asm__ __volatile__(ASM_STAC "\n"
+			     "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
+			     "2: " ASM_CLAC "\n"
 			     ".section .fixup,\"ax\"\n"
 			     "3:  movl $-1,%[err]\n"
 			     "    jmp  2b\n"
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fcc..cd43e525fde 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1113,7 +1113,8 @@ void syscall_init(void)
 
 	/* Flags to clear on syscall */
 	wrmsrl(MSR_SYSCALL_MASK,
-	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
+	       X86_EFLAGS_IOPL|X86_EFLAGS_AC);
 }
 
 unsigned long kernel_eflags;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834..ce87e3d10c6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,7 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -465,7 +466,8 @@ END(ret_from_fork)
  * System call entry. Up to 6 arguments in registers are supported.
  *
  * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
+ * stack pointer.  However, it does mask the flags register for us, so
+ * CLD and CLAC are not needed.
  */
 
 /*
@@ -884,6 +886,7 @@ END(interrupt)
 	 */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
+	ASM_CLAC
 	XCPT_FRAME
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
@@ -1023,6 +1026,7 @@ END(common_interrupt)
  */
 .macro apicinterrupt num sym do_sym
 ENTRY(\sym)
+	ASM_CLAC
 	INTR_FRAME
 	pushq_cfi $~(\num)
 .Lcommon_\sym:
@@ -1077,6 +1081,7 @@ apicinterrupt IRQ_WORK_VECTOR \
  */
 .macro zeroentry sym do_sym
 ENTRY(\sym)
+	ASM_CLAC
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
@@ -1094,6 +1099,7 @@ END(\sym)
 
 .macro paranoidzeroentry sym do_sym
 ENTRY(\sym)
+	ASM_CLAC
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
@@ -1112,6 +1118,7 @@ END(\sym)
 #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
 .macro paranoidzeroentry_ist sym do_sym ist
 ENTRY(\sym)
+	ASM_CLAC
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
@@ -1131,6 +1138,7 @@ END(\sym)
 
 .macro errorentry sym do_sym
 ENTRY(\sym)
+	ASM_CLAC
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $ORIG_RAX-R15, %rsp
@@ -1149,6 +1157,7 @@ END(\sym)
 	/* error code is on the stack already */
 .macro paranoiderrorentry sym do_sym
 ENTRY(\sym)
+	ASM_CLAC
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $ORIG_RAX-R15, %rsp
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b2995f4557..a30ca15be21 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,6 +17,7 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 /*
  * By placing feature2 after feature1 in altinstructions section, we logically
@@ -130,6 +131,7 @@ ENDPROC(bad_from_user)
  */
 ENTRY(copy_user_generic_unrolled)
 	CFI_STARTPROC
+	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
 	ALIGN_DESTINATION
@@ -177,6 +179,7 @@ ENTRY(copy_user_generic_unrolled)
 	decl %ecx
 	jnz 21b
 23:	xor %eax,%eax
+	ASM_CLAC
 	ret
 
 	.section .fixup,"ax"
@@ -232,6 +235,7 @@ ENDPROC(copy_user_generic_unrolled)
  */
 ENTRY(copy_user_generic_string)
 	CFI_STARTPROC
+	ASM_STAC
 	andl %edx,%edx
 	jz 4f
 	cmpl $8,%edx
@@ -246,6 +250,7 @@ ENTRY(copy_user_generic_string)
 3:	rep
 	movsb
 4:	xorl %eax,%eax
+	ASM_CLAC
 	ret
 
 	.section .fixup,"ax"
@@ -273,12 +278,14 @@ ENDPROC(copy_user_generic_string)
  */
 ENTRY(copy_user_enhanced_fast_string)
 	CFI_STARTPROC
+	ASM_STAC
 	andl %edx,%edx
 	jz 2f
 	movl %edx,%ecx
 1:	rep
 	movsb
 2:	xorl %eax,%eax
+	ASM_CLAC
 	ret
 
 	.section .fixup,"ax"
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cacddc7163e..6a4f43c2d9e 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -15,6 +15,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 	.macro ALIGN_DESTINATION
 #ifdef FIX_ALIGNMENT
@@ -48,6 +49,7 @@
  */
 ENTRY(__copy_user_nocache)
 	CFI_STARTPROC
+	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
 	ALIGN_DESTINATION
@@ -95,6 +97,7 @@ ENTRY(__copy_user_nocache)
 	decl %ecx
 	jnz 21b
 23:	xorl %eax,%eax
+	ASM_CLAC
 	sfence
 	ret
 
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index b33b1fb1e6d..156b9c80467 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -33,6 +33,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 	.text
 ENTRY(__get_user_1)
@@ -40,8 +41,10 @@ ENTRY(__get_user_1)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+	ASM_STAC
 1:	movzb (%_ASM_AX),%edx
 	xor %eax,%eax
+	ASM_CLAC
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_1)
@@ -53,8 +56,10 @@ ENTRY(__get_user_2)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+	ASM_STAC
 2:	movzwl -1(%_ASM_AX),%edx
 	xor %eax,%eax
+	ASM_CLAC
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_2)
@@ -66,8 +71,10 @@ ENTRY(__get_user_4)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+	ASM_STAC
 3:	mov -3(%_ASM_AX),%edx
 	xor %eax,%eax
+	ASM_CLAC
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_4)
@@ -80,8 +87,10 @@ ENTRY(__get_user_8)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae	bad_get_user
+	ASM_STAC
 4:	movq -7(%_ASM_AX),%_ASM_DX
 	xor %eax,%eax
+	ASM_CLAC
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_8)
@@ -91,6 +100,7 @@ bad_get_user:
 	CFI_STARTPROC
 	xor %edx,%edx
 	mov $(-EFAULT),%_ASM_AX
+	ASM_CLAC
 	ret
 	CFI_ENDPROC
 END(bad_get_user)
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 7f951c8f76c..fc6ba17a7ee 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -15,6 +15,7 @@
 #include <asm/thread_info.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 
 /*
@@ -31,7 +32,8 @@
 
 #define ENTER	CFI_STARTPROC ; \
 		GET_THREAD_INFO(%_ASM_BX)
-#define EXIT	ret ; \
+#define EXIT	ASM_CLAC ;	\
+		ret ;		\
 		CFI_ENDPROC
 
 .text
@@ -39,6 +41,7 @@ ENTRY(__put_user_1)
 	ENTER
 	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
+	ASM_STAC
 1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
@@ -50,6 +53,7 @@ ENTRY(__put_user_2)
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+	ASM_STAC
 2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
@@ -61,6 +65,7 @@ ENTRY(__put_user_4)
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+	ASM_STAC
 3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
@@ -72,6 +77,7 @@ ENTRY(__put_user_8)
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+	ASM_STAC
 4:	mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
 5:	movl %edx,4(%_ASM_CX)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 1781b2f950e..98f6d6b68f5 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -42,10 +42,11 @@ do {									\
 	int __d0;							\
 	might_fault();							\
 	__asm__ __volatile__(						\
+		ASM_STAC "\n"						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
 		"1:	rep; stosb\n"					\
-		"2:\n"							\
+		"2: " ASM_CLAC "\n"					\
 		".section .fixup,\"ax\"\n"				\
 		"3:	lea 0(%2,%0,4),%0\n"				\
 		"	jmp 2b\n"					\
@@ -626,10 +627,12 @@ survive:
 		return n;
 	}
 #endif
+	stac();
 	if (movsl_is_ok(to, from, n))
 		__copy_user(to, from, n);
 	else
 		n = __copy_user_intel(to, from, n);
+	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_to_user_ll);
@@ -637,10 +640,12 @@ EXPORT_SYMBOL(__copy_to_user_ll);
 unsigned long __copy_from_user_ll(void *to, const void __user *from,
 					unsigned long n)
 {
+	stac();
 	if (movsl_is_ok(to, from, n))
 		__copy_user_zeroing(to, from, n);
 	else
 		n = __copy_user_zeroing_intel(to, from, n);
+	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll);
@@ -648,11 +653,13 @@ EXPORT_SYMBOL(__copy_from_user_ll);
 unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
 					 unsigned long n)
 {
+	stac();
 	if (movsl_is_ok(to, from, n))
 		__copy_user(to, from, n);
 	else
 		n = __copy_user_intel((void __user *)to,
 				      (const void *)from, n);
+	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nozero);
@@ -660,6 +667,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
 unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
 					unsigned long n)
 {
+	stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
 	if (n > 64 && cpu_has_xmm2)
 		n = __copy_user_zeroing_intel_nocache(to, from, n);
@@ -668,6 +676,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
 #else
 	__copy_user_zeroing(to, from, n);
 #endif
+	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache);
@@ -675,6 +684,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache);
 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
 					unsigned long n)
 {
+	stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
 	if (n > 64 && cpu_has_xmm2)
 		n = __copy_user_intel_nocache(to, from, n);
@@ -683,6 +693,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 #else
 	__copy_user(to, from, n);
 #endif
+	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index e5b130bc2d0..05928aae911 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 	might_fault();
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
+	stac();
 	asm volatile(
 		"	testq  %[size8],%[size8]\n"
 		"	jz     4f\n"
@@ -40,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 		: [size8] "=&c"(size), [dst] "=&D" (__d0)
 		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
 		  [zero] "r" (0UL), [eight] "r" (8UL));
+	clac();
 	return size;
 }
 EXPORT_SYMBOL(__clear_user);
@@ -82,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
 	for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
 		if (__put_user_nocheck(c, to++, sizeof(char)))
 			break;
+	clac();
 	return len;
 }
-- 
cgit v1.2.3-70-g09d2


From 52b6179ac87d33c2eeaff5292786a10fe98cff64 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:13 -0700
Subject: x86, smap: Turn on Supervisor Mode Access Prevention

If Supervisor Mode Access Prevention is available and not disabled by
the user, turn it on.  Also fix the expansion of SMEP (Supervisor Mode
Execution Prevention.)

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-10-git-send-email-hpa@linux.intel.com
---
 Documentation/kernel-parameters.txt |  6 +++++-
 arch/x86/kernel/cpu/common.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5088c..49c5c41b07e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1812,8 +1812,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable non-executable mappings
 
+	nosmap		[X86]
+			Disable SMAP (Supervisor Mode Access Prevention)
+			even if it is supported by processor.
+
 	nosmep		[X86]
-			Disable SMEP (Supervisor Mode Execution Protection)
+			Disable SMEP (Supervisor Mode Execution Prevention)
 			even if it is supported by processor.
 
 	noexec32	[X86-64]
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cd43e525fde..7d35d659411 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -278,6 +278,31 @@ static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
 	}
 }
 
+static int disable_smap __cpuinitdata;
+static __init int setup_disable_smap(char *arg)
+{
+	disable_smap = 1;
+	return 1;
+}
+__setup("nosmap", setup_disable_smap);
+
+static __cpuinit void setup_smap(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_SMAP)) {
+		if (unlikely(disable_smap)) {
+			setup_clear_cpu_cap(X86_FEATURE_SMAP);
+			clear_in_cr4(X86_CR4_SMAP);
+		} else {
+			set_in_cr4(X86_CR4_SMAP);
+			/*
+			 * Don't use clac() here since alternatives
+			 * haven't run yet...
+			 */
+			asm volatile(__stringify(__ASM_CLAC) ::: "memory");
+		}
+	}
+}
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
@@ -713,6 +738,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	filter_cpuid_features(c, false);
 
 	setup_smep(c);
+	setup_smap(c);
 
 	if (this_cpu->c_bsp_init)
 		this_cpu->c_bsp_init(c);
-- 
cgit v1.2.3-70-g09d2


From 40d3cd6695014bf3c44e2ca66b610b18acaf923d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:14 -0700
Subject: x86, smap: A page fault due to SMAP is an oops

If we get a page fault due to SMAP, trigger an oops rather than
spinning forever.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-11-git-send-email-hpa@linux.intel.com
---
 arch/x86/mm/fault.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d8e0b..f2fb75d46b9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -995,6 +995,17 @@ static int fault_in_kernel_space(unsigned long address)
 	return address >= TASK_SIZE_MAX;
 }
 
+static inline bool smap_violation(int error_code, struct pt_regs *regs)
+{
+	if (error_code & PF_USER)
+		return false;
+
+	if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+		return false;
+
+	return true;
+}
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -1088,6 +1099,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
+	if (static_cpu_has(X86_FEATURE_SMAP)) {
+		if (unlikely(smap_violation(error_code, regs))) {
+			bad_area_nosemaphore(regs, error_code, address);
+			return;
+		}
+	}
+
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 5e88353d8b5f483bc1c873ad24ac2b59a6b66c73 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 12:43:15 -0700
Subject: x86, smap: Reduce the SMAP overhead for signal handling

Signal handling contains a bunch of accesses to individual user space
items, which causes an excessive number of STAC and CLAC
instructions.  Instead, let get/put_user_try ... get/put_user_catch()
contain the STAC and CLAC instructions.

This means that get/put_user_try no longer nests, and furthermore that
it is no longer legal to use user space access functions other than
__get/put_user_ex() inside those blocks.  However, these macros are
x86-specific anyway and are only used in the signal-handling paths; a
simple reordering of moving the larger subroutine calls out of the
try...catch blocks resolves that problem.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-12-git-send-email-hpa@linux.intel.com
---
 arch/x86/ia32/ia32_signal.c    | 12 +++++++-----
 arch/x86/include/asm/uaccess.h | 14 ++++++--------
 arch/x86/kernel/signal.c       | 24 ++++++++++++++----------
 3 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 673ac9b63d6..05e62a312bd 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -250,11 +250,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 		get_user_ex(tmp, &sc->fpstate);
 		buf = compat_ptr(tmp);
-		err |= restore_i387_xstate_ia32(buf);
 
 		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
+	err |= restore_i387_xstate_ia32(buf);
+
 	return err;
 }
 
@@ -502,7 +503,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(sig, &frame->sig);
 		put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
 		put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
-		err |= copy_siginfo_to_user32(&frame->info, info);
 
 		/* Create the ucontext.  */
 		if (cpu_has_xsave)
@@ -514,9 +514,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(sas_ss_flags(regs->sp),
 			    &frame->uc.uc_stack.ss_flags);
 		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-		err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-					     regs, set->sig[0]);
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 		if (ka->sa.sa_flags & SA_RESTORER)
 			restorer = ka->sa.sa_restorer;
@@ -532,6 +529,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
 	} put_user_catch(err);
 
+	err |= copy_siginfo_to_user32(&frame->info, info);
+	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+				     regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
 	if (err)
 		return -EFAULT;
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b92ece13c23..a91acfbb1a9 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -416,9 +416,8 @@ do {									\
 } while (0)
 
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile(ASM_STAC "\n"					\
-		     "1:	mov"itype" %1,%"rtype"0\n"		\
-		     "2: " ASM_CLAC "\n"				\
+	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
+		     "2:\n"						\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : ltype(x) : "m" (__m(addr)))
 
@@ -460,9 +459,8 @@ struct __large_struct { unsigned long buf[100]; };
 		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile(ASM_STAC "\n"					\
-		     "1:	mov"itype" %"rtype"0,%1\n"		\
-		     "2: " ASM_CLAC "\n"				\
+	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\
+		     "2:\n"						\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : : ltype(x), "m" (__m(addr)))
 
@@ -470,13 +468,13 @@ struct __large_struct { unsigned long buf[100]; };
  * uaccess_try and catch
  */
 #define uaccess_try	do {						\
-	int prev_err = current_thread_info()->uaccess_err;		\
 	current_thread_info()->uaccess_err = 0;				\
+	stac();								\
 	barrier();
 
 #define uaccess_catch(err)						\
+	clac();								\
 	(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);	\
-	current_thread_info()->uaccess_err = prev_err;			\
 } while (0)
 
 /**
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376..932612887e9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		regs->orig_ax = -1;		/* disable syscall checks */
 
 		get_user_ex(buf, &sc->fpstate);
-		err |= restore_i387_xstate(buf);
 
 		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
+	err |= restore_i387_xstate(buf);
+
 	return err;
 }
 
@@ -357,7 +358,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(sig, &frame->sig);
 		put_user_ex(&frame->info, &frame->pinfo);
 		put_user_ex(&frame->uc, &frame->puc);
-		err |= copy_siginfo_to_user(&frame->info, info);
 
 		/* Create the ucontext.  */
 		if (cpu_has_xsave)
@@ -369,9 +369,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(sas_ss_flags(regs->sp),
 			    &frame->uc.uc_stack.ss_flags);
 		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-					regs, set->sig[0]);
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 		/* Set up to return from userspace.  */
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -389,6 +386,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
 	} put_user_catch(err);
 
+	err |= copy_siginfo_to_user(&frame->info, info);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
 	if (err)
 		return -EFAULT;
 
@@ -436,8 +438,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		put_user_ex(sas_ss_flags(regs->sp),
 			    &frame->uc.uc_stack.ss_flags);
 		put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 		/* Set up to return from userspace.  If provided, use a stub
 		   already in userspace.  */
@@ -450,6 +450,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		}
 	} put_user_catch(err);
 
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
 	if (err)
 		return -EFAULT;
 
@@ -855,9 +858,6 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
 			    &frame->uc.uc_stack.ss_flags);
 		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 		put_user_ex(0, &frame->uc.uc__pad0);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-					regs, set->sig[0]);
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 		if (ka->sa.sa_flags & SA_RESTORER) {
 			restorer = ka->sa.sa_restorer;
@@ -869,6 +869,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
 		put_user_ex(restorer, &frame->pretcode);
 	} put_user_catch(err);
 
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
 	if (err)
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 4cd8daf05c7071ac80008c8d4368860110fa6466 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 19 Sep 2012 10:49:00 -0700
Subject: x86/PCI: Clear host bridge aperture struct resource

Use kzalloc() so the struct resource doesn't contain garbage in
fields we don't initialize.

[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: x86@kernel.org
---
 arch/x86/pci/acpi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 505acdd6d60..192397c9860 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 	res->flags = flags;
 	res->start = start;
 	res->end = end;
-	res->child = NULL;
 
 	if (!pci_use_crs) {
 		dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
 
 	size = sizeof(*info->res) * info->res_num;
 	info->res_num = 0;
-	info->res = kmalloc(size, GFP_KERNEL);
+	info->res = kzalloc(size, GFP_KERNEL);
 	if (!info->res)
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From e59d1b0a24199db01978e6c1e89859eda93ce683 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 21 Sep 2012 13:58:10 -0700
Subject: x86-32, smap: Add STAC/CLAC instructions to 32-bit kernel entry

The changes to entry_32.S got missed in checkin:

63bcff2a x86, smap: Add STAC and CLAC instructions to control user space access

The resulting kernel was largely functional but SMAP protection could
have been bypassed.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com
---
 arch/x86/kernel/entry_32.S | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f2883747..9ebbecab6e9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -57,6 +57,7 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -407,7 +408,9 @@ sysenter_past_esp:
  */
 	cmpl $__PAGE_OFFSET-3,%ebp
 	jae syscall_fault
+	ASM_STAC
 1:	movl (%ebp),%ebp
+	ASM_CLAC
 	movl %ebp,PT_EBP(%esp)
 	_ASM_EXTABLE(1b,syscall_fault)
 
@@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target)
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
+	ASM_CLAC
 	pushl_cfi %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
@@ -670,6 +674,7 @@ END(syscall_exit_work)
 
 	RING0_INT_FRAME			# can't unwind into user space anyway
 syscall_fault:
+	ASM_CLAC
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,PT_EAX(%esp)
 	jmp resume_userspace
@@ -825,6 +830,7 @@ END(interrupt)
  */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
+	ASM_CLAC
 	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
 	SAVE_ALL
 	TRACE_IRQS_OFF
@@ -841,6 +847,7 @@ ENDPROC(common_interrupt)
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
+	ASM_CLAC;			\
 	pushl_cfi $~(nr);		\
 	SAVE_ALL;			\
 	TRACE_IRQS_OFF			\
@@ -857,6 +864,7 @@ ENDPROC(name)
 
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_coprocessor_error
 	jmp error_code
@@ -865,6 +873,7 @@ END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
 	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
@@ -886,6 +895,7 @@ END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $-1			# mark this as an int
 	pushl_cfi $do_device_not_available
 	jmp error_code
@@ -906,6 +916,7 @@ END(native_irq_enable_sysexit)
 
 ENTRY(overflow)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_overflow
 	jmp error_code
@@ -914,6 +925,7 @@ END(overflow)
 
 ENTRY(bounds)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_bounds
 	jmp error_code
@@ -922,6 +934,7 @@ END(bounds)
 
 ENTRY(invalid_op)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_invalid_op
 	jmp error_code
@@ -930,6 +943,7 @@ END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_coprocessor_segment_overrun
 	jmp error_code
@@ -938,6 +952,7 @@ END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_invalid_TSS
 	jmp error_code
 	CFI_ENDPROC
@@ -945,6 +960,7 @@ END(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_segment_not_present
 	jmp error_code
 	CFI_ENDPROC
@@ -952,6 +968,7 @@ END(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_stack_segment
 	jmp error_code
 	CFI_ENDPROC
@@ -959,6 +976,7 @@ END(stack_segment)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_alignment_check
 	jmp error_code
 	CFI_ENDPROC
@@ -966,6 +984,7 @@ END(alignment_check)
 
 ENTRY(divide_error)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0			# no error code
 	pushl_cfi $do_divide_error
 	jmp error_code
@@ -975,6 +994,7 @@ END(divide_error)
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi machine_check_vector
 	jmp error_code
@@ -984,6 +1004,7 @@ END(machine_check)
 
 ENTRY(spurious_interrupt_bug)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $0
 	pushl_cfi $do_spurious_interrupt_bug
 	jmp error_code
@@ -1207,6 +1228,7 @@ return_to_handler:
 
 ENTRY(page_fault)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_page_fault
 	ALIGN
 error_code:
@@ -1279,6 +1301,7 @@ END(page_fault)
 
 ENTRY(debug)
 	RING0_INT_FRAME
+	ASM_CLAC
 	cmpl $ia32_sysenter_target,(%esp)
 	jne debug_stack_correct
 	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
@@ -1303,6 +1326,7 @@ END(debug)
  */
 ENTRY(nmi)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi %eax
 	movl %ss, %eax
 	cmpw $__ESPFIX_SS, %ax
@@ -1373,6 +1397,7 @@ END(nmi)
 
 ENTRY(int3)
 	RING0_INT_FRAME
+	ASM_CLAC
 	pushl_cfi $-1			# mark this as an int
 	SAVE_ALL
 	TRACE_IRQS_OFF
@@ -1393,6 +1418,7 @@ END(general_protection)
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
 	RING0_EC_FRAME
+	ASM_CLAC
 	pushl_cfi $do_async_page_fault
 	jmp error_code
 	CFI_ENDPROC
-- 
cgit v1.2.3-70-g09d2


From b1a74bf8212367be2b1d6685c11a84e056eaaaf1 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 20 Sep 2012 11:01:49 -0700
Subject: x86, kvm: fix kvm's usage of kernel_fpu_begin/end()

Preemption is disabled between kernel_fpu_begin/end() and as such
it is not a good idea to use these routines in kvm_load/put_guest_fpu()
which can be very far apart.

kvm_load/put_guest_fpu() routines are already called with
preemption disabled and KVM already uses the preempt notifier to save
the guest fpu state using kvm_put_guest_fpu().

So introduce __kernel_fpu_begin/end() routines which don't touch
preemption and use them instead of kernel_fpu_begin/end()
for KVM's use model of saving/restoring guest FPU state.

Also with this change (and with eagerFPU model), fix the host cr0.TS vm-exit
state in the case of VMX. For eagerFPU case, host cr0.TS is always clear.
So no need to worry about it. For the traditional lazyFPU restore case,
change the cr0.TS bit for the host state during vm-exit to be always clear
and cr0.TS bit is set in the __vmx_load_host_state() when the FPU
(guest FPU or the host task's FPU) state is not active. This ensures
that the host/guest FPU state is properly saved, restored
during context-switch and with interrupts (using irq_fpu_usable()) not
stomping on the active FPU state.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1348164109.26695.338.camel@sbsiddha-desk.sc.intel.com
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/i387.h | 28 ++++++++++++++++++++++++++--
 arch/x86/kernel/i387.c      | 13 +++++--------
 arch/x86/kvm/vmx.c          | 10 +++++++---
 arch/x86/kvm/x86.c          |  4 ++--
 4 files changed, 40 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 6c3bd378281..ed8089d6909 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -24,8 +24,32 @@ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 extern void math_state_restore(void);
 
 extern bool irq_fpu_usable(void);
-extern void kernel_fpu_begin(void);
-extern void kernel_fpu_end(void);
+
+/*
+ * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
+ * and they don't touch the preempt state on their own.
+ * If you enable preemption after __kernel_fpu_begin(), preempt notifier
+ * should call the __kernel_fpu_end() to prevent the kernel/user FPU
+ * state from getting corrupted. KVM for example uses this model.
+ *
+ * All other cases use kernel_fpu_begin/end() which disable preemption
+ * during kernel FPU usage.
+ */
+extern void __kernel_fpu_begin(void);
+extern void __kernel_fpu_end(void);
+
+static inline void kernel_fpu_begin(void)
+{
+	WARN_ON_ONCE(!irq_fpu_usable());
+	preempt_disable();
+	__kernel_fpu_begin();
+}
+
+static inline void kernel_fpu_end(void)
+{
+	__kernel_fpu_end();
+	preempt_enable();
+}
 
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 6782e398386..675a0501244 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -73,32 +73,29 @@ bool irq_fpu_usable(void)
 }
 EXPORT_SYMBOL(irq_fpu_usable);
 
-void kernel_fpu_begin(void)
+void __kernel_fpu_begin(void)
 {
 	struct task_struct *me = current;
 
-	WARN_ON_ONCE(!irq_fpu_usable());
-	preempt_disable();
 	if (__thread_has_fpu(me)) {
 		__save_init_fpu(me);
 		__thread_clear_has_fpu(me);
-		/* We do 'stts()' in kernel_fpu_end() */
+		/* We do 'stts()' in __kernel_fpu_end() */
 	} else if (!use_eager_fpu()) {
 		this_cpu_write(fpu_owner_task, NULL);
 		clts();
 	}
 }
-EXPORT_SYMBOL(kernel_fpu_begin);
+EXPORT_SYMBOL(__kernel_fpu_begin);
 
-void kernel_fpu_end(void)
+void __kernel_fpu_end(void)
 {
 	if (use_eager_fpu())
 		math_state_restore();
 	else
 		stts();
-	preempt_enable();
 }
-EXPORT_SYMBOL(kernel_fpu_end);
+EXPORT_SYMBOL(__kernel_fpu_end);
 
 void unlazy_fpu(struct task_struct *tsk)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03de1b7..70dfcec3c46 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-	if (user_has_fpu())
-		clts();
+	/*
+	 * If the FPU is not active (through the host task or
+	 * the guest vcpu), then restore the cr0.TS bit.
+	 */
+	if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
+		stts();
 	load_gdt(&__get_cpu_var(host_gdt));
 }
 
@@ -3730,7 +3734,7 @@ static void vmx_set_constant_host_state(void)
 	unsigned long tmpl;
 	struct desc_ptr dt;
 
-	vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */
+	vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
 	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
 	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf637f572bd..02b2cd52069 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5972,7 +5972,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 */
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
-	kernel_fpu_begin();
+	__kernel_fpu_begin();
 	fpu_restore_checking(&vcpu->arch.guest_fpu);
 	trace_kvm_fpu(1);
 }
@@ -5986,7 +5986,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 	vcpu->guest_fpu_loaded = 0;
 	fpu_save_init(&vcpu->arch.guest_fpu);
-	kernel_fpu_end();
+	__kernel_fpu_end();
 	++vcpu->stat.fpu_reload;
 	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
 	trace_kvm_fpu(0);
-- 
cgit v1.2.3-70-g09d2


From 7a84428af7ca6a847f058c9ff244a18a2664fd1b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 21 Sep 2012 11:58:03 -0600
Subject: KVM: Add resampling irqfds for level triggered interrupts

To emulate level triggered interrupts, add a resample option to
KVM_IRQFD.  When specified, a new resamplefd is provided that notifies
the user when the irqchip has been resampled by the VM.  This may, for
instance, indicate an EOI.  Also in this mode, posting of an interrupt
through an irqfd only asserts the interrupt.  On resampling, the
interrupt is automatically de-asserted prior to user notification.
This enables level triggered interrupts to be posted and re-enabled
from vfio with no userspace intervention.

All resampling irqfds can make use of a single irq source ID, so we
reserve a new one for this interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  13 ++++
 arch/x86/kvm/x86.c                |   4 +
 include/linux/kvm.h               |  12 ++-
 include/linux/kvm_host.h          |   5 +-
 virt/kvm/eventfd.c                | 150 +++++++++++++++++++++++++++++++++++++-
 virt/kvm/irq_comm.c               |   6 ++
 6 files changed, 184 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 36befa775fd..f6ec3a92e62 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1950,6 +1950,19 @@ the guest using the specified gsi pin.  The irqfd is removed using
 the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
 and kvm_irqfd.gsi.
 
+With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
+mechanism allowing emulation of level-triggered, irqfd-based
+interrupts.  When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
+additional eventfd in the kvm_irqfd.resamplefd field.  When operating
+in resample mode, posting of an interrupt through kvm_irq.fd asserts
+the specified gsi in the irqchip.  When the irqchip is resampled, such
+as from an EOI, the gsi is de-asserted and the user is notifed via
+kvm_irqfd.resamplefd.  It is the user's responsibility to re-queue
+the interrupt if the device making use of it still requires service.
+Note that closing the resamplefd is not sufficient to disable the
+irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
+and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2a0a132e4..7d44204c604 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+	/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
+	set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+		&kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 	mutex_init(&kvm->arch.apic_map_lock);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d808694673f..0a6d6ba44c8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info {
 #ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
 #endif
+#define KVM_CAP_IRQFD_RESAMPLE 82
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -690,12 +691,21 @@ struct kvm_xen_hvm_config {
 #endif
 
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+/*
+ * Available with KVM_CAP_IRQFD_RESAMPLE
+ *
+ * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
+ * the irqfd to operate in resampling mode for level triggered interrupt
+ * emlation.  See Documentation/virtual/kvm/api.txt.
+ */
+#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
 struct kvm_irqfd {
 	__u32 fd;
 	__u32 gsi;
 	__u32 flags;
-	__u8  pad[20];
+	__u32 resamplefd;
+	__u8  pad[16];
 };
 
 struct kvm_clock_data {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 80bfc880921..2850656e2e9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
 
-#define KVM_USERSPACE_IRQ_SOURCE_ID	0
+#define KVM_USERSPACE_IRQ_SOURCE_ID		0
+#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
 struct kvm;
 struct kvm_vcpu;
@@ -343,6 +344,8 @@ struct kvm {
 	struct {
 		spinlock_t        lock;
 		struct list_head  items;
+		struct list_head  resampler_list;
+		struct mutex      resampler_lock;
 	} irqfds;
 	struct list_head ioeventfds;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7d7e2aaffec..356965c9d10 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -43,6 +43,31 @@
  * --------------------------------------------------------------------
  */
 
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgement through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct _irqfd_resampler {
+	struct kvm *kvm;
+	/*
+	 * List of resampling struct _irqfd objects sharing this gsi.
+	 * RCU list modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head list;
+	struct kvm_irq_ack_notifier notifier;
+	/*
+	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
+	 * resamplers among irqfds on the same gsi.
+	 * Accessed and modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head link;
+};
+
 struct _irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
@@ -52,6 +77,12 @@ struct _irqfd {
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
+	/* The resampler used by this irqfd (resampler-only) */
+	struct _irqfd_resampler *resampler;
+	/* Eventfd notified on resample (resampler-only) */
+	struct eventfd_ctx *resamplefd;
+	/* Entry in list of irqfds for a resampler (resampler-only) */
+	struct list_head resampler_link;
 	/* Used for setup/shutdown */
 	struct eventfd_ctx *eventfd;
 	struct list_head list;
@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
-	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
-	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+	if (!irqfd->resampler) {
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+	} else
+		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+			    irqfd->gsi, 1);
+}
+
+/*
+ * Since resampler irqfds share an IRQ source ID, we de-assert once
+ * then notify all of the resampler irqfds using this GSI.  We can't
+ * do multiple de-asserts or we risk racing with incoming re-asserts.
+ */
+static void
+irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
+{
+	struct _irqfd_resampler *resampler;
+	struct _irqfd *irqfd;
+
+	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+
+	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+		    resampler->notifier.gsi, 0);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+		eventfd_signal(irqfd->resamplefd, 1);
+
+	rcu_read_unlock();
+}
+
+static void
+irqfd_resampler_shutdown(struct _irqfd *irqfd)
+{
+	struct _irqfd_resampler *resampler = irqfd->resampler;
+	struct kvm *kvm = resampler->kvm;
+
+	mutex_lock(&kvm->irqfds.resampler_lock);
+
+	list_del_rcu(&irqfd->resampler_link);
+	synchronize_rcu();
+
+	if (list_empty(&resampler->list)) {
+		list_del(&resampler->link);
+		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
+		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+			    resampler->notifier.gsi, 0);
+		kfree(resampler);
+	}
+
+	mutex_unlock(&kvm->irqfds.resampler_lock);
 }
 
 /*
@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work)
 	 */
 	flush_work_sync(&irqfd->inject);
 
+	if (irqfd->resampler) {
+		irqfd_resampler_shutdown(irqfd);
+		eventfd_ctx_put(irqfd->resamplefd);
+	}
+
 	/*
 	 * It is now safe to release the object's resources
 	 */
@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
-	struct eventfd_ctx *eventfd = NULL;
+	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
 
@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
 	irqfd->eventfd = eventfd;
 
+	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
+		struct _irqfd_resampler *resampler;
+
+		resamplefd = eventfd_ctx_fdget(args->resamplefd);
+		if (IS_ERR(resamplefd)) {
+			ret = PTR_ERR(resamplefd);
+			goto fail;
+		}
+
+		irqfd->resamplefd = resamplefd;
+		INIT_LIST_HEAD(&irqfd->resampler_link);
+
+		mutex_lock(&kvm->irqfds.resampler_lock);
+
+		list_for_each_entry(resampler,
+				    &kvm->irqfds.resampler_list, list) {
+			if (resampler->notifier.gsi == irqfd->gsi) {
+				irqfd->resampler = resampler;
+				break;
+			}
+		}
+
+		if (!irqfd->resampler) {
+			resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
+			if (!resampler) {
+				ret = -ENOMEM;
+				mutex_unlock(&kvm->irqfds.resampler_lock);
+				goto fail;
+			}
+
+			resampler->kvm = kvm;
+			INIT_LIST_HEAD(&resampler->list);
+			resampler->notifier.gsi = irqfd->gsi;
+			resampler->notifier.irq_acked = irqfd_resampler_ack;
+			INIT_LIST_HEAD(&resampler->link);
+
+			list_add(&resampler->link, &kvm->irqfds.resampler_list);
+			kvm_register_irq_ack_notifier(kvm,
+						      &resampler->notifier);
+			irqfd->resampler = resampler;
+		}
+
+		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
+		synchronize_rcu();
+
+		mutex_unlock(&kvm->irqfds.resampler_lock);
+	}
+
 	/*
 	 * Install our own custom wake-up handling so we are notified via
 	 * a callback whenever someone signals the underlying eventfd
@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	return 0;
 
 fail:
+	if (irqfd->resampler)
+		irqfd_resampler_shutdown(irqfd);
+
+	if (resamplefd && !IS_ERR(resamplefd))
+		eventfd_ctx_put(resamplefd);
+
 	if (eventfd && !IS_ERR(eventfd))
 		eventfd_ctx_put(eventfd);
 
@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm)
 {
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
+	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
+	mutex_init(&kvm->irqfds.resampler_lock);
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 int
 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN)
+	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
 		return -EINVAL;
 
 	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 3ca89c451d6..2eb58af7ee9 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	}
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
 	set_bit(irq_source_id, bitmap);
 unlock:
 	mutex_unlock(&kvm->irq_lock);
@@ -238,6 +241,9 @@ unlock:
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
 
 	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
-- 
cgit v1.2.3-70-g09d2


From c863901075a42d50678616d8ee4b96ef13080498 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 21 Sep 2012 05:42:55 +0200
Subject: KVM: x86: Fix guest debug across vcpu INIT reset

If we reset a vcpu on INIT, we so far overwrote dr7 as provided by
KVM_SET_GUEST_DEBUG, and we also cleared switch_db_regs unconditionally.

Fix this by saving the dr7 used for guest debugging and calculating the
effective register value as well as switch_db_regs on any potential
change. This will change to focus of the set_guest_debug vendor op to
update_dp_bp_intercept.

Found while trying to stop on start_secondary.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/svm.c              | 23 ++++-------------------
 arch/x86/kvm/vmx.c              | 14 +-------------
 arch/x86/kvm/x86.c              | 26 +++++++++++++++++---------
 4 files changed, 24 insertions(+), 43 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b902c98f27..c9a91368fc5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -471,6 +471,7 @@ struct kvm_vcpu_arch {
 	unsigned long dr6;
 	unsigned long dr7;
 	unsigned long eff_db[KVM_NR_DB_REGS];
+	unsigned long guest_debug_dr7;
 
 	u64 mcg_cap;
 	u64 mcg_status;
@@ -647,8 +648,7 @@ struct kvm_x86_ops {
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
-	void (*set_guest_debug)(struct kvm_vcpu *vcpu,
-				struct kvm_guest_debug *dbg);
+	void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 818fceb3091..d017df3899e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 
 	svm_set_efer(&svm->vcpu, 0);
 	save->dr6 = 0xffff0ff0;
-	save->dr7 = 0x400;
 	kvm_set_rflags(&svm->vcpu, 2);
 	save->rip = 0x0000fff0;
 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
@@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	mark_dirty(svm->vmcb, VMCB_SEG);
 }
 
-static void update_db_intercept(struct kvm_vcpu *vcpu)
+static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 		vcpu->guest_debug = 0;
 }
 
-static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
-	else
-		svm->vmcb->save.dr7 = vcpu->arch.dr7;
-
-	mark_dirty(svm->vmcb, VMCB_DR);
-
-	update_db_intercept(vcpu);
-}
-
 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
 {
 	if (sd->next_asid > sd->max_asid) {
@@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm)
 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
 			svm->vmcb->save.rflags &=
 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-		update_db_intercept(&svm->vcpu);
+		update_db_bp_intercept(&svm->vcpu);
 	}
 
 	if (svm->vcpu.guest_debug &
@@ -3659,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	 */
 	svm->nmi_singlestep = true;
 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
-	update_db_intercept(vcpu);
+	update_db_bp_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4253,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.vcpu_load = svm_vcpu_load,
 	.vcpu_put = svm_vcpu_put,
 
-	.set_guest_debug = svm_guest_debug,
+	.update_db_bp_intercept = update_db_bp_intercept,
 	.get_msr = svm_get_msr,
 	.set_msr = svm_set_msr,
 	.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 30bcb953afe..5d46c905e06 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2288,16 +2288,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 	}
 }
 
-static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
-{
-	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-		vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
-	else
-		vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
-
-	update_exception_bitmap(vcpu);
-}
-
 static __init int cpu_has_kvm_support(void)
 {
 	return cpu_has_vmx();
@@ -3960,8 +3950,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		kvm_rip_write(vcpu, 0);
 	kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
 
-	vmcs_writel(GUEST_DR7, 0x400);
-
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
@@ -7237,7 +7225,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
 
-	.set_guest_debug = set_guest_debug,
+	.update_db_bp_intercept = update_exception_bitmap,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d44204c604..b16d4a5bfa4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -692,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr7(struct kvm_vcpu *vcpu)
+{
+	unsigned long dr7;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+		dr7 = vcpu->arch.guest_debug_dr7;
+	else
+		dr7 = vcpu->arch.dr7;
+	kvm_x86_ops->set_dr7(vcpu, dr7);
+	vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
+}
+
 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
 	switch (dr) {
@@ -717,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		if (val & 0xffffffff00000000ULL)
 			return -1; /* #GP */
 		vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
-		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
-			kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
-			vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
-		}
+		kvm_update_dr7(vcpu);
 		break;
 	}
 
@@ -5851,13 +5860,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 		for (i = 0; i < KVM_NR_DB_REGS; ++i)
 			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
-		vcpu->arch.switch_db_regs =
-			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+		vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
 	} else {
 		for (i = 0; i < KVM_NR_DB_REGS; i++)
 			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
-		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
 	}
+	kvm_update_dr7(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
@@ -5869,7 +5877,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	 */
 	kvm_set_rflags(vcpu, rflags);
 
-	kvm_x86_ops->set_guest_debug(vcpu, dbg);
+	kvm_x86_ops->update_db_bp_intercept(vcpu);
 
 	r = 0;
 
@@ -6045,10 +6053,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.nmi_pending = 0;
 	vcpu->arch.nmi_injected = false;
 
-	vcpu->arch.switch_db_regs = 0;
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
 	vcpu->arch.dr6 = DR6_FIXED_1;
 	vcpu->arch.dr7 = DR7_FIXED_1;
+	kvm_update_dr7(vcpu);
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	vcpu->arch.apf.msr_val = 0;
-- 
cgit v1.2.3-70-g09d2


From a2db672aa305a045404615e5222ba681bab6cf58 Mon Sep 17 00:00:00 2001
From: Silas Boyd-Wickizer <sbw@mit.edu>
Date: Fri, 3 Aug 2012 12:33:27 -0700
Subject: Use get_online_cpus to avoid races involving CPU hotplug

If arch/x86/kernel/msr.c is a module, a CPU might offline or online
between the for_each_online_cpu(i) loop and the call to
register_hotcpu_notifier in msr_init or the call to
unregister_hotcpu_notifier in msr_exit. The potential races can lead
to leaks/duplicates, attempts to destroy non-existant devices, or
random pointer dereferences.

For example, in msr_init if:

        for_each_online_cpu(i) {
                err = msr_device_create(i);
                if (err != 0)
                        goto out_class;
        }
        <----- CPU offlines
        register_hotcpu_notifier(&msr_class_cpu_notifier);

and the CPU never onlines before msr_exit, then the module will never
call msr_device_destroy for the associated CPU.

This fix surrounds for_each_online_cpu and register_hotcpu_notifier or
unregister_hotcpu_notifier with get_online_cpus+put_online_cpus.

Tested on a VM.

Signed-off-by: Silas Boyd-Wickizer <sbw@mit.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/kernel/msr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb113693f04..a7c5661f849 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
 		goto out_chrdev;
 	}
 	msr_class->devnode = msr_devnode;
+	get_online_cpus();
 	for_each_online_cpu(i) {
 		err = msr_device_create(i);
 		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&msr_class_cpu_notifier);
+	put_online_cpus();
 
 	err = 0;
 	goto out;
@@ -271,6 +273,7 @@ out_class:
 	i = 0;
 	for_each_online_cpu(i)
 		msr_device_destroy(i);
+	put_online_cpus();
 	class_destroy(msr_class);
 out_chrdev:
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
 static void __exit msr_exit(void)
 {
 	int cpu = 0;
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		msr_device_destroy(cpu);
 	class_destroy(msr_class);
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
 	unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+	put_online_cpus();
 }
 
 module_init(msr_init);
-- 
cgit v1.2.3-70-g09d2


From 429227bbe55647aa42f8f63cac61e4544e248629 Mon Sep 17 00:00:00 2001
From: Silas Boyd-Wickizer <sbw@mit.edu>
Date: Fri, 3 Aug 2012 12:34:50 -0700
Subject: Use get_online_cpus to avoid races involving CPU hotplug

If arch/x86/kernel/cpuid.c is a module, a CPU might offline or online
between the for_each_online_cpu() loop and the call to
register_hotcpu_notifier in cpuid_init or the call to
unregister_hotcpu_notifier in cpuid_exit.  The potential races can
lead to leaks/duplicates, attempts to destroy non-existant devices, or
random pointer dereferences.

For example, in cpuid_exit if:

        for_each_online_cpu(cpu)
                cpuid_device_destroy(cpu);
        class_destroy(cpuid_class);
        __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
        <----- CPU onlines
        unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);

the hotcpu notifier will attempt to create a device for the
cpuid_class, which the module already destroyed.

This fix surrounds for_each_online_cpu and register_hotcpu_notifier or
unregister_hotcpu_notifier with get_online_cpus+put_online_cpus.

Tested on a VM.

Signed-off-by: Silas Boyd-Wickizer <sbw@mit.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/kernel/cpuid.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd2323..60c78917190 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
 		goto out_chrdev;
 	}
 	cpuid_class->devnode = cpuid_devnode;
+	get_online_cpus();
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
 		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+	put_online_cpus();
 
 	err = 0;
 	goto out;
@@ -214,6 +216,7 @@ out_class:
 	for_each_online_cpu(i) {
 		cpuid_device_destroy(i);
 	}
+	put_online_cpus();
 	class_destroy(cpuid_class);
 out_chrdev:
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
 {
 	int cpu = 0;
 
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		cpuid_device_destroy(cpu);
 	class_destroy(cpuid_class);
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 	unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+	put_online_cpus();
 }
 
 module_init(cpuid_init);
-- 
cgit v1.2.3-70-g09d2


From 8d54db795dfb1049d45dc34f0dddbc5347ec5642 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 17 Aug 2012 10:22:37 -0400
Subject: xen/boot: Disable NUMA for PV guests.

The hypervisor is in charge of allocating the proper "NUMA" memory
and dealing with the CPU scheduler to keep them bound to the proper
NUMA node. The PV guests (and PVHVM) have no inkling of where they
run and do not need to know that right now. In the future we will
need to inject NUMA configuration data (if a guest spans two or more
NUMA nodes) so that the kernel can make the right choices. But those
patches are not yet present.

In the meantime, disable the NUMA capability in the PV guest, which
also fixes a bootup issue. Andre says:

"we see Dom0 crashes due to the kernel detecting the NUMA topology not
by ACPI, but directly from the northbridge (CONFIG_AMD_NUMA).

This will detect the actual NUMA config of the physical machine, but
will crash about the mismatch with Dom0's virtual memory. Variation of
the theme: Dom0 sees what it's not supposed to see.

This happens with the said config option enabled and on a machine where
this scanning is still enabled (K8 and Fam10h, not Bulldozer class)

We have this dump then:
NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10
Scanning NUMA topology in Northbridge 24
Number of physical nodes 4
Node 0 MemBase 0000000000000000 Limit 0000000040000000
Node 1 MemBase 0000000040000000 Limit 0000000138000000
Node 2 MemBase 0000000138000000 Limit 00000001f8000000
Node 3 MemBase 00000001f8000000 Limit 0000000238000000
Initmem setup node 0 0000000000000000-0000000040000000
  NODE_DATA [000000003ffd9000 - 000000003fffffff]
Initmem setup node 1 0000000040000000-0000000138000000
  NODE_DATA [0000000137fd9000 - 0000000137ffffff]
Initmem setup node 2 0000000138000000-00000001f8000000
  NODE_DATA [00000001f095e000 - 00000001f0984fff]
Initmem setup node 3 00000001f8000000-0000000238000000
Cannot find 159744 bytes in node 3
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81d220e6>] __alloc_bootmem_node+0x43/0x96
Pid: 0, comm: swapper Not tainted 3.3.6 #1 AMD Dinar/Dinar
RIP: e030:[<ffffffff81d220e6>]  [<ffffffff81d220e6>] __alloc_bootmem_node+0x43/0x96
.. snip..
  [<ffffffff81d23024>] sparse_early_usemaps_alloc_node+0x64/0x178
  [<ffffffff81d23348>] sparse_init+0xe4/0x25a
  [<ffffffff81d16840>] paging_init+0x13/0x22
  [<ffffffff81d07fbb>] setup_arch+0x9c6/0xa9b
  [<ffffffff81683954>] ? printk+0x3c/0x3e
  [<ffffffff81d01a38>] start_kernel+0xe5/0x468
  [<ffffffff81d012cf>] x86_64_start_reservations+0xba/0xc1
  [<ffffffff81007153>] ? xen_setup_runstate_info+0x2c/0x36
  [<ffffffff81d050ee>] xen_start_kernel+0x565/0x56c
"

so we just disable NUMA scanning by setting numa_off=1.

CC: stable@vger.kernel.org
Reported-and-Tested-by: Andre Przywara <andre.przywara@amd.com>
Acked-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d11ca11d14f..e2d62d697b5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -17,6 +17,7 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/acpi.h>
+#include <asm/numa.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -544,4 +545,7 @@ void __init xen_arch_setup(void)
 	disable_cpufreq();
 	WARN_ON(set_pm_idle_to_default());
 	fiddle_vdso();
+#ifdef CONFIG_NUMA
+	numa_off = 1;
+#endif
 }
-- 
cgit v1.2.3-70-g09d2


From b3c869d35b9b014f63ac0beacd31c57372084d01 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 12:42:27 -0400
Subject: jiffies: Remove compile time assumptions about CLOCK_TICK_RATE

CLOCK_TICK_RATE is used to accurately caclulate exactly how
a tick will be at a given HZ.

This is useful, because while we'd expect NSEC_PER_SEC/HZ,
the underlying hardware will have some granularity limit,
so we won't be able to have exactly HZ ticks per second.

This slight error can cause timekeeping quality problems
when using the jiffies or other jiffies driven clocksources.
Thus we currently use compile time CLOCK_TICK_RATE value to
generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use
to adjust the jiffies clocksource to correct this error.

Unfortunately though, since CLOCK_TICK_RATE is a compile
time value, and the jiffies clocksource is registered very
early during boot, there are a number of cases where there
are different possible hardware timers that have different
tick rates. This causes problems in cases like ARM where
there are numerous different types of hardware, each having
their own compile-time CLOCK_TICK_RATE, making it hard to
accurately support different hardware with a single kernel.

For the most part, this doesn't matter all that much, as not
too many systems actually utilize the jiffies or jiffies driven
clocksource. Usually there are other highres clocksources
who's granularity error is negligable.

Even so, we have some complicated calcualtions that we do
everywhere to handle these edge cases.

This patch removes the compile time SHIFTED_HZ value, and
introduces a register_refined_jiffies() function. This results
in the default jiffies clock as being assumed a perfect HZ
freq, and allows archtectures that care about jiffies accuracy
to call register_refined_jiffies() with the tick rate, specified
dynamically at boot.

This allows us, where necessary, to not have a compile time
CLOCK_TICK_RATE constant, simplifies the jiffies code, and
still provides a way to have an accurate jiffies clock.

NOTE: Since this patch does not add register_refinied_jiffies()
calls for every arch, it may cause time quality regressions
in some cases. Its likely these will not be noticable, but
if they are an issue, adding the following to the end of
setup_arch() should resolve the regression:
	register_refinied_jiffies(CLOCK_TICK_RATE)

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/kernel/setup.c |  3 +++
 include/linux/jiffies.h | 15 ++-------------
 kernel/time/jiffies.c   | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b9..4062f15bfbd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -68,6 +68,7 @@
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
 #include <linux/tboot.h>
+#include <linux/jiffies.h>
 
 #include <video/edid.h>
 
@@ -1034,6 +1035,8 @@ void __init setup_arch(char **cmdline_p)
 	mcheck_init();
 
 	arch_init_ideal_nops();
+
+	register_refined_jiffies(CLOCK_TICK_RATE);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 4a7e3864e80..388edb54bfb 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -51,21 +51,10 @@
 #define SH_DIV(NOM,DEN,LSH) (   (((NOM) / (DEN)) << (LSH))              \
                              + ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))
 
-#ifdef CLOCK_TICK_RATE
-/* LATCH is used in the interval timer and ftape setup. */
-# define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
-
-/*
- * HZ is the requested value. However the CLOCK_TICK_RATE may not allow
- * for exactly HZ. So SHIFTED_HZ is high res HZ ("<< 8" is for accuracy)
- */
-# define SHIFTED_HZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
-#else
-# define SHIFTED_HZ (HZ << 8)
-#endif
+extern int register_refined_jiffies(long clock_tick_rate);
 
 /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
-#define TICK_NSEC (SH_DIV(1000000UL * 1000, SHIFTED_HZ, 8))
+#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
 
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da0537c10..6629bf7b528 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
  * requested HZ value. It is also not recommended
  * for "tick-less" systems.
  */
-#define NSEC_PER_JIFFY	((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
+#define NSEC_PER_JIFFY	((NSEC_PER_SEC+HZ/2)/HZ)
 
 /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
  * conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
 {
 	return &clocksource_jiffies;
 }
+
+struct clocksource refined_jiffies;
+
+int register_refined_jiffies(long cycles_per_second)
+{
+	u64 nsec_per_tick, shift_hz;
+	long cycles_per_tick;
+
+
+
+	refined_jiffies = clocksource_jiffies;
+	refined_jiffies.name = "refined-jiffies";
+	refined_jiffies.rating++;
+
+	/* Calc cycles per tick */
+	cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
+	/* shift_hz stores hz<<8 for extra accuracy */
+	shift_hz = (u64)cycles_per_second << 8;
+	shift_hz += cycles_per_tick/2;
+	do_div(shift_hz, cycles_per_tick);
+	/* Calculate nsec_per_tick using shift_hz */
+	nsec_per_tick = (u64)NSEC_PER_SEC << 8;
+	nsec_per_tick += (u32)shift_hz/2;
+	do_div(nsec_per_tick, (u32)shift_hz);
+
+	refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
+
+	clocksource_register(&refined_jiffies);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 189374aed657e2228ad6b39ece438c9cdafc8dae Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 15:27:48 -0400
Subject: time: Move update_vsyscall definitions to timekeeper_internal.h

Since users will need to include timekeeper_internal.h, move
update_vsyscall definitions to timekeeper_internal.h.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/ia64/kernel/time.c             |  2 +-
 arch/powerpc/kernel/time.c          |  2 +-
 arch/s390/kernel/time.c             |  2 +-
 arch/x86/kernel/vsyscall_64.c       |  2 +-
 include/linux/clocksource.h         | 16 ----------------
 include/linux/timekeeper_internal.h | 17 +++++++++++++++++
 kernel/time.c                       |  2 +-
 7 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ecc904b33c5..acb688fe235 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -19,7 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/efi.h>
 #include <linux/timex.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/platform_device.h>
 
 #include <asm/machvec.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e93191b6..613a830d9d5 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -73,7 +73,7 @@
 /* powerpc clocksource/clockevent code */
 
 #include <linux/clockchips.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 
 static cycle_t rtc_read(struct clocksource *);
 static struct clocksource clocksource_rtc = {
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dcec960fc72..bfb62ad312a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -34,7 +34,7 @@
 #include <linux/profile.h>
 #include <linux/timex.h>
 #include <linux/notifier.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/clockchips.h>
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8d141b30904..6ec8411a471 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -28,7 +28,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
 #include <linux/topology.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/getcpu.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index fbe89e17124..4dceaf8ae15 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -319,22 +319,6 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
 	__clocksource_updatefreq_scale(cs, 1000, khz);
 }
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
-extern void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
-			struct clocksource *c, u32 mult);
-extern void update_vsyscall_tz(void);
-#else
-static inline void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
-			struct clocksource *c, u32 mult)
-{
-}
-
-static inline void update_vsyscall_tz(void)
-{
-}
-#endif
 
 extern void timekeeping_notify(struct clocksource *clock);
 
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 8ba43fa8c7e..9c1c2cf413a 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -65,4 +65,21 @@ struct timekeeper {
 	/* Seqlock for all timekeeper values */
 	seqlock_t		lock;
 };
+
+
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+extern void
+update_vsyscall(struct timespec *ts, struct timespec *wtm,
+			struct clocksource *c, u32 mult);
+extern void update_vsyscall_tz(void);
+#else
+static inline void update_vsyscall(struct timespec *ts, struct timespec *wtm,
+					struct clocksource *c, u32 mult)
+{
+}
+static inline void update_vsyscall_tz(void)
+{
+}
+#endif
+
 #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf8069..d226c6a3fd2 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/export.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/errno.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
-- 
cgit v1.2.3-70-g09d2


From 706394211648117762edfaeffd6fc04bf3b1a75d Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 15:34:21 -0400
Subject: time: Convert CONFIG_GENERIC_TIME_VSYSCALL to
 CONFIG_GENERIC_TIME_VSYSCALL_OLD

To help migrate archtectures over to the new update_vsyscall method,
redfine CONFIG_GENERIC_TIME_VSYSCALL as CONFIG_GENERIC_TIME_VSYSCALL_OLD

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/ia64/Kconfig                   | 2 +-
 arch/ia64/kernel/time.c             | 2 +-
 arch/powerpc/Kconfig                | 2 +-
 arch/powerpc/kernel/time.c          | 2 +-
 arch/s390/Kconfig                   | 2 +-
 arch/s390/kernel/time.c             | 2 +-
 arch/x86/Kconfig                    | 2 +-
 arch/x86/kernel/vsyscall_64.c       | 2 +-
 include/linux/timekeeper_internal.h | 7 ++++---
 kernel/time/Kconfig                 | 2 +-
 kernel/time/timekeeping.c           | 2 +-
 11 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf5781fa..f9e673c252a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -38,7 +38,7 @@ config IA64
 	select ARCH_TASK_STRUCT_ALLOCATOR
 	select ARCH_THREAD_INFO_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index acb688fe235..d2f4e260db4 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -454,7 +454,7 @@ void update_vsyscall_tz(void)
 {
 }
 
-void update_vsyscall(struct timespec *wall, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
 			struct clocksource *c, u32 mult)
 {
 	write_seqcount_begin(&fsyscall_gtod_data.seq);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 352f416269c..0881660c9ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 613a830d9d5..c825809b92e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -712,7 +712,7 @@ static cycle_t timebase_read(struct clocksource *cs)
 	return (cycle_t)get_tb();
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	u64 new_tb_to_xs, new_stamp_xsec;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a2..ba488aa500d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -121,7 +121,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_SMP_IDLE_THREAD
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index bfb62ad312a..c5430bf5b5e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -219,7 +219,7 @@ struct clocksource * __init clocksource_default_clock(void)
 	return &clocksource_tod;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	if (clock != &clocksource_tod)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..e3f3c1a5a74 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,7 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS
 	select ARCH_CLOCKSOURCE_DATA if X86_64
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
-	select GENERIC_TIME_VSYSCALL if X86_64
+	select GENERIC_TIME_VSYSCALL_OLD if X86_64
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 6ec8411a471..77dde2953c6 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -82,7 +82,7 @@ void update_vsyscall_tz(void)
 	vsyscall_gtod_data.sys_tz = sys_tz;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	struct timespec monotonic;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 9c1c2cf413a..a904d76a5fa 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -67,13 +67,14 @@ struct timekeeper {
 };
 
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
 extern void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
+update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 			struct clocksource *c, u32 mult);
 extern void update_vsyscall_tz(void);
 #else
-static inline void update_vsyscall(struct timespec *ts, struct timespec *wtm,
+static inline void
+update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 					struct clocksource *c, u32 mult)
 {
 }
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd452b7..489c86154d1 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -13,7 +13,7 @@ config ARCH_CLOCKSOURCE_DATA
 	bool
 
 # Timekeeping vsyscall support
-config GENERIC_TIME_VSYSCALL
+config GENERIC_TIME_VSYSCALL_OLD
 	bool
 
 # ktime_t scalar 64bit nsec representation
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 02c19d3d8e0..7c2851384c4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -199,7 +199,7 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
 		ntp_clear();
 	}
 	xt = tk_xtime(tk);
-	update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 650ea02475106e8d6bdf561896d2ffe0d1c0ebb4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 16:14:46 -0400
Subject: time: Convert x86_64 to using new update_vsyscall

Switch x86_64 to using sub-ns precise vsyscall

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/Kconfig               |  2 +-
 arch/x86/include/asm/vgtod.h   |  4 ++--
 arch/x86/kernel/vsyscall_64.c  | 47 +++++++++++++++++++++++++-----------------
 arch/x86/vdso/vclock_gettime.c | 22 +++++++++++++-------
 4 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3f3c1a5a74..8ec3a1aa4ab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,7 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS
 	select ARCH_CLOCKSOURCE_DATA if X86_64
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
-	select GENERIC_TIME_VSYSCALL_OLD if X86_64
+	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 8b38be2de9e..46e24d36b7d 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {
 
 	/* open coded 'struct timespec' */
 	time_t		wall_time_sec;
-	u32		wall_time_nsec;
-	u32		monotonic_time_nsec;
+	u64		wall_time_snsec;
+	u64		monotonic_time_snsec;
 	time_t		monotonic_time_sec;
 
 	struct timezone sys_tz;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 77dde2953c6..3a3e8c9e280 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
 	vsyscall_gtod_data.sys_tz = sys_tz;
 }
 
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
-			struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec monotonic;
+	struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
-	write_seqcount_begin(&vsyscall_gtod_data.seq);
+	write_seqcount_begin(&vdata->seq);
 
 	/* copy vsyscall data */
-	vsyscall_gtod_data.clock.vclock_mode	= clock->archdata.vclock_mode;
-	vsyscall_gtod_data.clock.cycle_last	= clock->cycle_last;
-	vsyscall_gtod_data.clock.mask		= clock->mask;
-	vsyscall_gtod_data.clock.mult		= mult;
-	vsyscall_gtod_data.clock.shift		= clock->shift;
-
-	vsyscall_gtod_data.wall_time_sec	= wall_time->tv_sec;
-	vsyscall_gtod_data.wall_time_nsec	= wall_time->tv_nsec;
+	vdata->clock.vclock_mode	= tk->clock->archdata.vclock_mode;
+	vdata->clock.cycle_last		= tk->clock->cycle_last;
+	vdata->clock.mask		= tk->clock->mask;
+	vdata->clock.mult		= tk->mult;
+	vdata->clock.shift		= tk->shift;
+
+	vdata->wall_time_sec		= tk->xtime_sec;
+	vdata->wall_time_snsec		= tk->xtime_nsec;
+
+	vdata->monotonic_time_sec	= tk->xtime_sec
+					+ tk->wall_to_monotonic.tv_sec;
+	vdata->monotonic_time_snsec	= tk->xtime_nsec
+					+ (tk->wall_to_monotonic.tv_nsec
+						<< tk->shift);
+	while (vdata->monotonic_time_snsec >=
+					(((u64)NSEC_PER_SEC) << tk->shift)) {
+		vdata->monotonic_time_snsec -=
+					((u64)NSEC_PER_SEC) << tk->shift;
+		vdata->monotonic_time_sec++;
+	}
 
-	monotonic = timespec_add(*wall_time, *wtm);
-	vsyscall_gtod_data.monotonic_time_sec	= monotonic.tv_sec;
-	vsyscall_gtod_data.monotonic_time_nsec	= monotonic.tv_nsec;
+	vdata->wall_time_coarse.tv_sec	= tk->xtime_sec;
+	vdata->wall_time_coarse.tv_nsec	= (long)(tk->xtime_nsec >> tk->shift);
 
-	vsyscall_gtod_data.wall_time_coarse	= __current_kernel_time();
-	vsyscall_gtod_data.monotonic_time_coarse =
-		timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
+	vdata->monotonic_time_coarse	= timespec_add(vdata->wall_time_coarse,
+							tk->wall_to_monotonic);
 
-	write_seqcount_end(&vsyscall_gtod_data.seq);
+	write_seqcount_end(&vdata->seq);
 }
 
 static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 885eff49d6a..4df6c373421 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 }
 
 
-notrace static inline long vgetns(void)
+notrace static inline u64 vgetsns(void)
 {
 	long v;
 	cycles_t cycles;
@@ -91,21 +91,24 @@ notrace static inline long vgetns(void)
 	else
 		return 0;
 	v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
-	return (v * gtod->clock.mult) >> gtod->clock.shift;
+	return v * gtod->clock.mult;
 }
 
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
-	unsigned long seq, ns;
+	unsigned long seq;
+	u64 ns;
 	int mode;
 
+	ts->tv_nsec = 0;
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->wall_time_sec;
-		ts->tv_nsec = gtod->wall_time_nsec;
-		ns = vgetns();
+		ns = gtod->wall_time_snsec;
+		ns += vgetsns();
+		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
 	timespec_add_ns(ts, ns);
@@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 
 notrace static int do_monotonic(struct timespec *ts)
 {
-	unsigned long seq, ns;
+	unsigned long seq;
+	u64 ns;
 	int mode;
 
+	ts->tv_nsec = 0;
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->monotonic_time_sec;
-		ts->tv_nsec = gtod->monotonic_time_nsec;
-		ns = vgetns();
+		ns = gtod->monotonic_time_snsec;
+		ns += vgetsns();
+		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 	timespec_add_ns(ts, ns);
 
-- 
cgit v1.2.3-70-g09d2


From 82c93fcc2e1737fede2752520f1bf8f4de6304d8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dxchgb@gmail.com>
Date: Mon, 24 Sep 2012 07:34:51 +0000
Subject: x86: bpf_jit_comp: add XOR instruction for BPF JIT

This patch is a follow-up for patch "filter: add XOR instruction for use
with X/K" that implements BPF x86 JIT parts for the BPF XOR operation.

Signed-off-by: Daniel Borkmann <daniel.borkmann@tik.ee.ethz.ch>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 106c5782912..520d2bd0b9c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -335,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp)
 					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
 				break;
 			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
+			case BPF_S_ALU_XOR_X:
 				seen |= SEEN_XREG;
 				EMIT2(0x31, 0xd8);		/* xor %ebx,%eax */
 				break;
+			case BPF_S_ALU_XOR_K: /* A ^= K; */
+				if (K == 0)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xf0, K);	/* xor imm8,%eax */
+				else
+					EMIT1_off32(0x35, K);	/* xor imm32,%eax */
+				break;
 			case BPF_S_ALU_LSH_X: /* A <<= X; */
 				seen |= SEEN_XREG;
 				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
-- 
cgit v1.2.3-70-g09d2


From fdf9c356502ae02238efcdf90cefd7b473a63fd4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 9 Sep 2012 14:56:31 +0200
Subject: cputime: Make finegrained irqtime accounting generally available

There is no known reason for this option to be unavailable on other
archs than x86. They just need to call enable_sched_clock_irqtime()
if they have a sufficiently finegrained clock to make it working.

Move it to the general option and let the user choose between
it and pure tick based or virtual cputime accounting.

Note that virtual cputime accounting already performs a finegrained
irqtime accounting. CONFIG_IRQ_TIME_ACCOUNTING is a kind of middle ground
between tick and virtual based accounting. So CONFIG_IRQ_TIME_ACCOUNTING
and CONFIG_VIRT_CPU_ACCOUNTING are mutually exclusive choices.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/Kconfig     |  6 ++++++
 arch/x86/Kconfig | 12 +-----------
 init/Kconfig     | 30 +++++++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index f78de57487a..101c31a4744 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -284,4 +284,10 @@ config SECCOMP_FILTER
 config HAVE_VIRT_CPU_ACCOUNTING
 	bool
 
+config HAVE_IRQ_TIME_ACCOUNTING
+	bool
+	help
+	  Archs need to ensure they use a high enough resolution clock to
+	  support irq time accounting and then call enable_sched_clock_irqtime().
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..b86833aed83 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_IRQ_TIME_ACCOUNTING
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -796,17 +797,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/init/Kconfig b/init/Kconfig
index 2c5aa3407d6..1862c689307 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -326,10 +326,25 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
+choice
+	prompt "Cputime accounting"
+	default TICK_CPU_ACCOUNTING if !PPC64
+	default VIRT_CPU_ACCOUNTING if PPC64
+
+# Kind of a stub config for the pure tick based cputime accounting
+config TICK_CPU_ACCOUNTING
+	bool "Simple tick based cputime accounting"
+	depends on !S390
+	help
+	  This is the basic tick based cputime accounting that maintains
+	  statistics about user, system and idle time spent on per jiffies
+	  granularity.
+
+	  If unsure, say Y.
+
 config VIRT_CPU_ACCOUNTING
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
-	default y if PPC64
 	help
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
@@ -339,6 +354,19 @@ config VIRT_CPU_ACCOUNTING
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on HAVE_IRQ_TIME_ACCOUNTING
+	help
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
+endchoice
+
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	help
-- 
cgit v1.2.3-70-g09d2


From e139e95590dfebab81841bf7a3ac46500f51a47c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 25 Sep 2012 15:42:18 -0700
Subject: x86, smap: Do not abuse the [f][x]rstor_checking() functions for user
 space

With SMAP, the [f][x]rstor_checking() functions are no longer usable
for user-space pointers by applying a simple __force cast.  Instead,
create new [f][x]rstor_user() functions which do the proper SMAP
magic.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-3-git-send-email-suresh.b.siddha@intel.com
---
 arch/x86/include/asm/fpu-internal.h | 17 +++++++++++++++++
 arch/x86/kernel/xsave.c             |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 409b9ccf551..831dbb9c6c0 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -181,11 +181,28 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 			  "m" (*fx));
 }
 
+static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
+{
+	if (config_enabled(CONFIG_X86_32))
+		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+	/* See comment in fpu_fxsave() below. */
+	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+			  "m" (*fx));
+}
+
 static inline int frstor_checking(struct i387_fsave_struct *fx)
 {
 	return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
+static inline int frstor_user(struct i387_fsave_struct __user *fx)
+{
+	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
 static inline void fpu_fxsave(struct fpu *fpu)
 {
 	if (config_enabled(CONFIG_X86_32))
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4e89b3dd408..ada87a329ed 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -315,7 +315,7 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
 		if ((unsigned long)buf % 64 || fx_only) {
 			u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
 			xrstor_state(init_xstate_buf, init_bv);
-			return fxrstor_checking((__force void *) buf);
+			return fxrstor_user(buf);
 		} else {
 			u64 init_bv = pcntxt_mask & ~xbv;
 			if (unlikely(init_bv))
@@ -323,9 +323,9 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
 			return xrestore_user(buf, xbv);
 		}
 	} else if (use_fxsr()) {
-		return fxrstor_checking((__force void *) buf);
+		return fxrstor_user(buf);
 	} else
-		return frstor_checking((__force void *) buf);
+		return frstor_user(buf);
 }
 
 int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
-- 
cgit v1.2.3-70-g09d2


From dec08a837fda146fee498ffc5ecd0d2eeeacd025 Mon Sep 17 00:00:00 2001
From: Michael Wang <wangyun@linux.vnet.ibm.com>
Date: Wed, 19 Sep 2012 13:42:23 +0800
Subject: x86: Remove the useless branch in c_start()

Since 'cpu == -1' in cpumask_next() is legal, no need to handle
'*pos == 0' specially.

About the comments:

	/* just in case, cpu 0 is not the first */

A test with a cpumask in which cpu 0 is not the first has been
done, and it works well.

This patch will remove that useless branch to clean the code.

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Cc: kjwinchester@gmail.com
Cc: borislav.petkov@amd.com
Cc: ak@linux.intel.com
Link: http://lkml.kernel.org/r/1348033343-23658-1-git-send-email-wangyun@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/proc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 8022c668148..fbd89556229 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	if (*pos == 0)	/* just in case, cpu 0 is not the first */
-		*pos = cpumask_first(cpu_online_mask);
-	else
-		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	*pos = cpumask_next(*pos - 1, cpu_online_mask);
 	if ((*pos) < nr_cpu_ids)
 		return &cpu_data(*pos);
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From 57c078ce13983d27c83f962b3e9722887209005c Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Wed, 26 Sep 2012 09:54:17 +0900
Subject: x86/api: Rename mp_register_lapic in a comment

Commit 31d2092eb0c23636b73d2c24c0c11b66470cef58 ("x86: move
mp_register_lapic_address to boot.c") renamed mp_register_lapic
to acpi_register_lapic. But mp_register_lapic remains in a
comment. So the patch rename it.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/50625239.3050403@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/acpi/boot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6e..e651f7a589a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	acpi_register_lapic(physid, ACPI_MADT_ENABLED);
 
 	/*
-	 * If mp_register_lapic successfully generates a new logical cpu
+	 * If acpi_register_lapic successfully generates a new logical cpu
 	 * number, then the following will get us exactly what was mapped
 	 */
 	cpumask_andnot(new_map, cpu_present_mask, tmp_map);
-- 
cgit v1.2.3-70-g09d2


From 1b2b23d8573076a587ed2081e0d2b69691079e0e Mon Sep 17 00:00:00 2001
From: Tao Guo <glorioustao@gmail.com>
Date: Wed, 26 Sep 2012 04:28:22 -0400
Subject: x86_64: Work around old GAS bug

GAS in binutils(2.16.91) could not parse parentheses within
macro parameters unless fully parenthesized, and this is a
workaround to make old gas work without generating below errors:

 arch/x86/kernel/entry_64.S: Assembler messages:
 arch/x86/kernel/entry_64.S:387: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:389: Error: too many positional arguments
 [...]

Signed-off-by: Tao Guo <glorioustao@gmail.com>
Reluctantly-Acked-by: Jan Beulich <jbeulich@novell.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1348648102-12653-1-git-send-email-glorioustao@gmail.com
[ Jan argues that these old GAS versions are fragile - which is so, but lets give them a chance. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/calling.h | 48 ++++++++++++++++++++----------------------
 arch/x86/kernel/entry_64.S     | 20 +++++++++---------
 2 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index a9e3a740f69..7f8422a28a4 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with
 #include "dwarf2.h"
 
 /*
- * 64-bit system call stack frame layout defines and helpers, for
- * assembly code (note that the seemingly unnecessary parentheses
- * are to prevent cpp from inserting spaces in expressions that get
- * passed to macros):
+ * 64-bit system call stack frame layout defines and helpers,
+ * for assembly code:
  */
 
-#define R15		  (0)
-#define R14		  (8)
-#define R13		 (16)
-#define R12		 (24)
-#define RBP		 (32)
-#define RBX		 (40)
+#define R15		  0
+#define R14		  8
+#define R13		 16
+#define R12		 24
+#define RBP		 32
+#define RBX		 40
 
 /* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 (48)
-#define R10		 (56)
-#define R9		 (64)
-#define R8		 (72)
-#define RAX		 (80)
-#define RCX		 (88)
-#define RDX		 (96)
-#define RSI		(104)
-#define RDI		(112)
-#define ORIG_RAX	(120)       /* + error_code */
+#define R11		 48
+#define R10		 56
+#define R9		 64
+#define R8		 72
+#define RAX		 80
+#define RCX		 88
+#define RDX		 96
+#define RSI		104
+#define RDI		112
+#define ORIG_RAX	120       /* + error_code */
 /* end of arguments */
 
 /* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		(128)
-#define CS		(136)
-#define EFLAGS		(144)
-#define RSP		(152)
-#define SS		(160)
+#define RIP		128
+#define CS		136
+#define EFLAGS		144
+#define RSP		152
+#define SS		160
 
 #define ARGOFFSET	R11
 #define SWFRAME		ORIG_RAX
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b1dac12dc5e..2c6706167c8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -342,15 +342,15 @@ ENDPROC(native_usergs_sysret64)
 	.macro SAVE_ARGS_IRQ
 	cld
 	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, RDI-RBP
-	movq_cfi rsi, RSI-RBP
-	movq_cfi rdx, RDX-RBP
-	movq_cfi rcx, RCX-RBP
-	movq_cfi rax, RAX-RBP
-	movq_cfi  r8,  R8-RBP
-	movq_cfi  r9,  R9-RBP
-	movq_cfi r10, R10-RBP
-	movq_cfi r11, R11-RBP
+	movq_cfi rdi, (RDI-RBP)
+	movq_cfi rsi, (RSI-RBP)
+	movq_cfi rdx, (RDX-RBP)
+	movq_cfi rcx, (RCX-RBP)
+	movq_cfi rax, (RAX-RBP)
+	movq_cfi  r8,  (R8-RBP)
+	movq_cfi  r9,  (R9-RBP)
+	movq_cfi r10, (R10-RBP)
+	movq_cfi r11, (R11-RBP)
 
 	/* Save rbp so that we can unwind from get_irq_regs() */
 	movq_cfi rbp, 0
@@ -384,7 +384,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 ENTRY(save_rest)
-	PARTIAL_FRAME 1 REST_SKIP+8
+	PARTIAL_FRAME 1 (REST_SKIP+8)
 	movq 5*8+16(%rsp), %r11	/* save return address */
 	movq_cfi rbx, RBX+16
 	movq_cfi rbp, RBP+16
-- 
cgit v1.2.3-70-g09d2


From c416ddf5b909736f5b57d348f5de159693e699ad Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 25 Sep 2012 14:51:19 +0200
Subject: x86: Unspaghettize do_trap()

Cleanup the label maze in this function. Having a
seperate function to first handle the traps that don't
generate a signal makes it easier to convert into
more readable conditional paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1348577479-2564-1-git-send-email-fweisbec@gmail.com
[ Fixed 32-bit build failure. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 60 ++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c936..6ff771559af 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -107,30 +107,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	dec_preempt_count();
 }
 
-static void __kprobes
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
-	long error_code, siginfo_t *info)
+static int __kprobes
+do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
+		  struct pt_regs *regs,	long error_code)
 {
-	struct task_struct *tsk = current;
-
 #ifdef CONFIG_X86_32
 	if (regs->flags & X86_VM_MASK) {
 		/*
-		 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
+		 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 		 * On nmi (interrupt 2), do_trap should not be called.
 		 */
-		if (trapnr < X86_TRAP_UD)
-			goto vm86_trap;
-		goto trap_signal;
+		if (trapnr < X86_TRAP_UD) {
+			if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
+						error_code, trapnr))
+				return 0;
+		}
+		return -1;
 	}
 #endif
+	if (!user_mode(regs)) {
+		if (!fixup_exception(regs)) {
+			tsk->thread.error_code = error_code;
+			tsk->thread.trap_nr = trapnr;
+			die(str, regs, error_code);
+		}
+		return 0;
+	}
 
-	if (!user_mode(regs))
-		goto kernel_trap;
+	return -1;
+}
 
-#ifdef CONFIG_X86_32
-trap_signal:
-#endif
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+	long error_code, siginfo_t *info)
+{
+	struct task_struct *tsk = current;
+
+
+	if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
+		return;
 	/*
 	 * We want error_code and trap_nr set for userspace faults and
 	 * kernelspace faults which result in die(), but not
@@ -158,23 +173,6 @@ trap_signal:
 		force_sig_info(signr, info, tsk);
 	else
 		force_sig(signr, tsk);
-	return;
-
-kernel_trap:
-	if (!fixup_exception(regs)) {
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_nr = trapnr;
-		die(str, regs, error_code);
-	}
-	return;
-
-#ifdef CONFIG_X86_32
-vm86_trap:
-	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
-						error_code, trapnr))
-		goto trap_signal;
-	return;
-#endif
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
-- 
cgit v1.2.3-70-g09d2


From bf5a3c13b939813d28ce26c01425054c740d6731 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:34 +0200
Subject: x86: Syscall hooks for userspace RCU extended QS

Add syscall slow path hooks to notify syscall entry
and exit on CPUs that want to support userspace RCU
extended quiescent state.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/include/asm/thread_info.h | 10 +++++++---
 arch/x86/kernel/ptrace.c           |  5 +++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007e..c535d847e3b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,6 +89,7 @@ struct thread_info {
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
 #define TIF_FORK		18	/* ret_from_fork */
+#define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
+#define _TIF_NOHZ		(1 << TIF_NOHZ)
 #define _TIF_DEBUG		(1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
-	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
+	 _TIF_NOHZ)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
-	 _TIF_SYSCALL_TRACEPOINT)
+	 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
@@ -141,7 +144,8 @@ struct thread_info {
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK						\
-	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
+	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
+	_TIF_NOHZ)
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0..9f94f8ec26e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
 
+	rcu_user_exit();
+
 	/*
 	 * If we stepped into a sysenter/syscall insn, it trapped in
 	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs)
 			!test_thread_flag(TIF_SYSCALL_EMU);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, step);
+
+	rcu_user_enter();
 }
-- 
cgit v1.2.3-70-g09d2


From ef3f628872c838933a279d0d7e63e707783c9710 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Sep 2012 21:05:52 +0200
Subject: x86: Unspaghettize do_general_protection()

There is some unnatural label based layout in this function.
Convert the unnecessary goto to readable conditional blocks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c936..74850e55944 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -258,13 +258,25 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK)
-		goto gp_in_vm86;
+	if (regs->flags & X86_VM_MASK) {
+		local_irq_enable();
+		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+		return;
+	}
 #endif
 
 	tsk = current;
-	if (!user_mode(regs))
-		goto gp_in_kernel;
+	if (!user_mode(regs)) {
+		if (fixup_exception(regs))
+			return;
+
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_nr = X86_TRAP_GP;
+		if (!notify_die(DIE_GPF, "general protection fault", regs, error_code,
+			       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
+			die("general protection fault", regs, error_code);
+		return;
+	}
 
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_GP;
@@ -280,24 +292,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	force_sig(SIGSEGV, tsk);
 	return;
-
-#ifdef CONFIG_X86_32
-gp_in_vm86:
-	local_irq_enable();
-	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-	return;
-#endif
-
-gp_in_kernel:
-	if (fixup_exception(regs))
-		return;
-
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_nr = X86_TRAP_GP;
-	if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
-			X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
-		return;
-	die("general protection fault", regs, error_code);
 }
 
 /* May run on IST stack. */
-- 
cgit v1.2.3-70-g09d2


From 6ba3c97a38803883c2eee489505796cb0a727122 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:35 +0200
Subject: x86: Exception hooks for userspace RCU extended QS

Add necessary hooks to x86 exception for userspace
RCU extended quiescent state support.

This includes traps, page fault, debug exceptions, etc...

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/include/asm/rcu.h | 20 ++++++++++++
 arch/x86/kernel/traps.c    | 81 +++++++++++++++++++++++++++++++---------------
 arch/x86/mm/fault.c        | 13 ++++++--
 3 files changed, 86 insertions(+), 28 deletions(-)
 create mode 100644 arch/x86/include/asm/rcu.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644
index 00000000000..439815b35ce
--- /dev/null
+++ b/arch/x86/include/asm/rcu.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_RCU_H
+#define _ASM_X86_RCU_H
+
+#include <linux/rcupdate.h>
+#include <asm/ptrace.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+	rcu_user_exit();
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+#ifdef CONFIG_RCU_USER_QS
+	if (user_mode(regs))
+		rcu_user_enter();
+#endif
+}
+
+#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 74850e55944..378967578f2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/mce.h>
+#include <asm/rcu.h>
 
 #include <asm/mach_traps.h>
 
@@ -180,11 +181,15 @@ vm86_trap:
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-							== NOTIFY_STOP)	\
+	exception_enter(regs);						\
+	if (notify_die(DIE_TRAP, str, regs, error_code,			\
+			trapnr, signr) == NOTIFY_STOP) {		\
+		exception_exit(regs);					\
 		return;							\
+	}								\
 	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
+	exception_exit(regs);						\
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
@@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 	info.si_errno = 0;						\
 	info.si_code = sicode;						\
 	info.si_addr = (void __user *)siaddr;				\
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-							== NOTIFY_STOP)	\
+	exception_enter(regs);						\
+	if (notify_die(DIE_TRAP, str, regs, error_code,			\
+			trapnr, signr) == NOTIFY_STOP) {		\
+		exception_exit(regs);					\
 		return;							\
+	}								\
 	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, &info);		\
+	exception_exit(regs);						\
 }
 
 DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
 /* Runs on IST stack */
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
-			X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
-		return;
-	preempt_conditional_sti(regs);
-	do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
-	preempt_conditional_cli(regs);
+		       X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+		preempt_conditional_sti(regs);
+		do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+		preempt_conditional_cli(regs);
+	}
+	exception_exit(regs);
 }
 
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
 
+	exception_enter(regs);
 	/* Return not checked because double check cannot be ignored */
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -255,27 +267,28 @@ do_general_protection(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk;
 
+	exception_enter(regs);
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
 	if (regs->flags & X86_VM_MASK) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-		return;
+		goto exit;
 	}
 #endif
 
 	tsk = current;
 	if (!user_mode(regs)) {
 		if (fixup_exception(regs))
-			return;
+			goto exit;
 
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
-		if (!notify_die(DIE_GPF, "general protection fault", regs, error_code,
-			       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
+		if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
+			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
 			die("general protection fault", regs, error_code);
-		return;
+		goto exit;
 	}
 
 	tsk->thread.error_code = error_code;
@@ -291,7 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	}
 
 	force_sig(SIGSEGV, tsk);
-	return;
+exit:
+	exception_exit(regs);
 }
 
 /* May run on IST stack. */
@@ -306,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	    ftrace_int3_handler(regs))
 		return;
 #endif
+	exception_enter(regs);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 				SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
 	if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 			SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 
 	/*
 	 * Let others (NMI) know that the debug stack is in use
@@ -325,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
 	debug_stack_usage_dec();
+exit:
+	exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_64
@@ -385,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	unsigned long dr6;
 	int si_code;
 
+	exception_enter(regs);
+
 	get_debugreg(dr6, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
@@ -400,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	/* Catch kmemcheck conditions first of all! */
 	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
-		return;
+		goto exit;
 
 	/* DR6 may or may not be cleared by the CPU */
 	set_debugreg(0, 6);
@@ -415,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
 							SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 
 	/*
 	 * Let others (NMI) know that the debug stack is in use
@@ -431,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 					X86_TRAP_DB);
 		preempt_conditional_cli(regs);
 		debug_stack_usage_dec();
-		return;
+		goto exit;
 	}
 
 	/*
@@ -452,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	preempt_conditional_cli(regs);
 	debug_stack_usage_dec();
 
-	return;
+exit:
+	exception_exit(regs);
 }
 
 /*
@@ -549,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
 	ignore_fpu_irq = 1;
 #endif
-
+	exception_enter(regs);
 	math_error(regs, error_code, X86_TRAP_MF);
+	exception_exit(regs);
 }
 
 dotraplinkage void
 do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 	math_error(regs, error_code, X86_TRAP_XF);
+	exception_exit(regs);
 }
 
 dotraplinkage void
@@ -623,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
@@ -631,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 
 		info.regs = regs;
 		math_emulate(&info);
+		exception_exit(regs);
 		return;
 	}
 #endif
@@ -638,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
 	conditional_sti(regs);
 #endif
+	exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 {
 	siginfo_t info;
+
+	exception_enter(regs);
 	local_irq_enable();
 
 	info.si_signo = SIGILL;
@@ -651,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 	info.si_code = ILL_BADSTK;
 	info.si_addr = NULL;
 	if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
-			X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
-		return;
-	do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
-		&info);
+			X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
+		do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
+			&info);
+	}
+	exception_exit(regs);
 }
 #endif
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d8e0b..7dde46d68a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 #include <asm/fixmap.h>			/* VSYSCALL_START		*/
+#include <asm/rcu.h>			/* exception_enter(), ...	*/
 
 /*
  * Page fault error code bits:
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  */
-dotraplinkage void __kprobes
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void __kprobes
+__do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
@@ -1209,3 +1210,11 @@ good_area:
 
 	up_read(&mm->mmap_sem);
 }
+
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	exception_enter(regs);
+	__do_page_fault(regs, error_code);
+	exception_exit(regs);
+}
-- 
cgit v1.2.3-70-g09d2


From 0430499ce9d78691f3985962021b16bf8f8a8048 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:38 +0200
Subject: x86: Use the new schedule_user API on userspace preemption

This way we can exit the RCU extended quiescent state before
we schedule a new task from irq/exception exit.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/include/asm/rcu.h | 12 ++++++++++++
 arch/x86/kernel/entry_64.S |  9 +++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
index 439815b35ce..d1ac07a2397 100644
--- a/arch/x86/include/asm/rcu.h
+++ b/arch/x86/include/asm/rcu.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_RCU_H
 #define _ASM_X86_RCU_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/rcupdate.h>
 #include <asm/ptrace.h>
 
@@ -17,4 +19,14 @@ static inline void exception_exit(struct pt_regs *regs)
 #endif
 }
 
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_RCU_USER_QS
+# define SCHEDULE_USER call schedule_user
+#else
+# define SCHEDULE_USER call schedule
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
 #endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834..1a8f3cbb6ee 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,7 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
+#include <asm/rcu.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -565,7 +566,7 @@ sysret_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	jmp sysret_check
 
@@ -678,7 +679,7 @@ int_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -974,7 +975,7 @@ retint_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call  schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1449,7 +1450,7 @@ paranoid_userspace:
 paranoid_schedule:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
+	SCHEDULE_USER
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	jmp paranoid_userspace
-- 
cgit v1.2.3-70-g09d2


From edf55fda35c7dc7f2d9241c3abaddaf759b457c6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:39 +0200
Subject: x86: Exit RCU extended QS on notify resume

do_notify_resume() may be called on irq or exception
exit. But at that time the exception has already called
rcu_user_enter() and the irq has already called rcu_irq_exit().

Since it can use RCU read side critical section, we must call
rcu_user_exit() before doing anything there. Then we must call
back rcu_user_enter() after this function because we know we are
going to userspace from there.

This complete support for userspace RCU extended quiescent state
in x86-64.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/Kconfig         | 1 +
 arch/x86/kernel/signal.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50a1d1f9b6d..20c49b8450b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_RCU_USER_QS if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376..bca0ab903e5 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
+	rcu_user_exit();
+
 #ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
+
+	rcu_user_enter();
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-- 
cgit v1.2.3-70-g09d2


From 5a5a51db78ef24aa61a4cb2ae36f07f6fa37356d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 24 Sep 2012 16:05:48 -0700
Subject: x86-32: Start out eflags and cr4 clean

%cr4 is supposed to reflect a set of features into which the operating
system is opting in.  If the BIOS or bootloader leaks bits here, this
is not desirable.  Consider a bootloader passing in %cr4.pae set to a
legacy paging kernel, for example -- it will not have any immediate
effect, but the kernel would crash when turning paging on.

A similar argument applies to %eflags, and since we have to look for
%eflags.id being settable we can use a sequence which clears %eflags
as a side effect.

Note that we already do this for x86-64.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348529239-17943-1-git-send-email-hpa@linux.intel.com
---
 arch/x86/kernel/head_32.S | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d42ab17b739..957a47aec64 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp)
 	leal -__PAGE_OFFSET(%ecx),%esp
 
 default_entry:
-
 /*
  *	New page tables may be in 4Mbyte page mode and may
  *	be using the global pages. 
  *
  *	NOTE! If we are on a 486 we may have no cr4 at all!
- *	So we do not try to touch it unless we really have
- *	some bits in it to set.  This won't work if the BSP
- *	implements cr4 but this AP does not -- very unlikely
- *	but be warned!  The same applies to the pse feature
- *	if not equally supported. --macro
- *
- *	NOTE! We have to correct for the fact that we're
- *	not yet offset PAGE_OFFSET..
+ *	Specifically, cr4 exists if and only if CPUID exists,
+ *	which in turn exists if and only if EFLAGS.ID exists.
  */
-#define cr4_bits pa(mmu_cr4_features)
-	movl cr4_bits,%edx
-	andl %edx,%edx
-	jz 6f
-	movl %cr4,%eax		# Turn on paging options (PSE,PAE,..)
-	orl %edx,%eax
+	movl $X86_EFLAGS_ID,%ecx
+	pushl %ecx
+	popfl
+	pushfl
+	popl %eax
+	pushl $0
+	popfl
+	pushfl
+	popl %edx
+	xorl %edx,%eax
+	testl %ecx,%eax
+	jz 6f			# No ID flag = no CPUID = no CR4
+
+	movl pa(mmu_cr4_features),%eax
 	movl %eax,%cr4
 
 	testb $X86_CR4_PAE, %al		# check if PAE is enabled
-- 
cgit v1.2.3-70-g09d2


From 73201dbec64aebf6b0dca855b523f437972dc7bb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 26 Sep 2012 15:02:34 -0700
Subject: x86, suspend: On wakeup always initialize cr4 and EFER

We already have a flag word to indicate the existence of MISC_ENABLES,
so use the same flag word to indicate existence of cr4 and EFER, and
always restore them if they exist.  That way if something passes a
nonzero value when the value *should* be zero, we will still
initialize it.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Link: http://lkml.kernel.org/r/1348529239-17943-1-git-send-email-hpa@linux.intel.com
---
 arch/x86/kernel/acpi/sleep.c      | 15 ++++++++++-----
 arch/x86/realmode/rm/wakeup.h     |  2 ++
 arch/x86/realmode/rm/wakeup_asm.S | 29 +++++++++++++++++++----------
 3 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1b8e5a03d94..11676cf65ae 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void)
 
 	header->video_mode = saved_video_mode;
 
+	header->pmode_behavior = 0;
+
 #ifndef CONFIG_64BIT
 	store_gdt((struct desc_ptr *)&header->pmode_gdt);
 
-	if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
-		       &header->pmode_efer_high))
-		header->pmode_efer_low = header->pmode_efer_high = 0;
+	if (!rdmsr_safe(MSR_EFER,
+			&header->pmode_efer_low,
+			&header->pmode_efer_high))
+		header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
 #endif /* !CONFIG_64BIT */
 
 	header->pmode_cr0 = read_cr0();
-	header->pmode_cr4 = read_cr4_safe();
-	header->pmode_behavior = 0;
+	if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
+		header->pmode_cr4 = read_cr4();
+		header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
+	}
 	if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
 			&header->pmode_misc_en_low,
 			&header->pmode_misc_en_high))
diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h
index 9317e0042f2..7dd86a419f5 100644
--- a/arch/x86/realmode/rm/wakeup.h
+++ b/arch/x86/realmode/rm/wakeup.h
@@ -36,5 +36,7 @@ extern struct wakeup_header wakeup_header;
 
 /* Wakeup behavior bits */
 #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE     0
+#define WAKEUP_BEHAVIOR_RESTORE_CR4		1
+#define WAKEUP_BEHAVIOR_RESTORE_EFER		2
 
 #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
index 8905166b0bb..e56479e5805 100644
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -74,9 +74,18 @@ ENTRY(wakeup_start)
 
 	lidtl	wakeup_idt
 
-	/* Clear the EFLAGS */
-	pushl	$0
+	/* Clear the EFLAGS but remember if we have EFLAGS.ID */
+	movl $X86_EFLAGS_ID, %ecx
+	pushl %ecx
 	popfl
+	pushfl
+	popl %edi
+	pushl $0
+	popfl
+	pushfl
+	popl %edx
+	xorl %edx, %edi
+	andl %ecx, %edi		/* %edi is zero iff CPUID & %cr4 are missing */
 
 	/* Check header signature... */
 	movl	signature, %eax
@@ -93,8 +102,8 @@ ENTRY(wakeup_start)
 
 	/* Restore MISC_ENABLE before entering protected mode, in case
 	   BIOS decided to clear XD_DISABLE during S3. */
-	movl	pmode_behavior, %eax
-	btl	$WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax
+	movl	pmode_behavior, %edi
+	btl	$WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %edi
 	jnc	1f
 
 	movl	pmode_misc_en, %eax
@@ -110,15 +119,15 @@ ENTRY(wakeup_start)
 	movl	pmode_cr3, %eax
 	movl	%eax, %cr3
 
-	movl	pmode_cr4, %ecx
-	jecxz	1f
-	movl	%ecx, %cr4
+	btl	$WAKEUP_BEHAVIOR_RESTORE_CR4, %edi
+	jz	1f
+	movl	pmode_cr4, %eax
+	movl	%eax, %cr4
 1:
+	btl	$WAKEUP_BEHAVIOR_RESTORE_EFER, %edi
+	jz	1f
 	movl	pmode_efer, %eax
 	movl	pmode_efer + 4, %edx
-	movl	%eax, %ecx
-	orl	%edx, %ecx
-	jz	1f
 	movl	$MSR_EFER, %ecx
 	wrmsr
 1:
-- 
cgit v1.2.3-70-g09d2


From 402537fd9f5499d1a50a6f20cdd9031d24ee2e47 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 27 Sep 2012 09:33:26 +0800
Subject: perf/x86: Fix typo in uncore_pmu_to_box

The variable box should not be declared as static.

This could probably cause crashes with sufficiently parallel
uncore PMU use.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Cc: eranian@google.com
Cc: a.p.zijlstra@chello.nl
Link: http://lkml.kernel.org/r/1348709606-2759-1-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 38e4894165b..c7c55e74ac6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1950,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
 static struct intel_uncore_box *
 uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	static struct intel_uncore_box *box;
+	struct intel_uncore_box *box;
 
 	box = *per_cpu_ptr(pmu->box, cpu);
 	if (box)
-- 
cgit v1.2.3-70-g09d2


From 200429cc63399e99dd2abcdca5088559a911ef2b Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 19 Sep 2012 14:24:57 +0300
Subject: crypto: cast5/avx - fix storing of new IV in CBC encryption

cast5/avx incorrectly XORs new IV over old IV at end of CBC encryption
function when it should store. This causes CBC encryption to give
incorrect output on multi-page encryption requests.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5_avx_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 445aab06387..e0ea14f9547 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -165,7 +165,7 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 		nbytes -= bsize;
 	} while (nbytes >= bsize);
 
-	*(u64 *)walk->iv ^= *iv;
+	*(u64 *)walk->iv = *iv;
 	return nbytes;
 }
 
-- 
cgit v1.2.3-70-g09d2


From fe04ddf7c2910362f3817c8156e41cbd6c0ee35d Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Tue, 25 Sep 2012 16:03:03 +0200
Subject: kbuild: Do not package /boot and /lib in make tar-pkg

There were reports of users destroying their Fedora installs by a kernel
tarball that replaces the /lib -> /usr/lib symlink. Let's remove the
toplevel directories from the tarball to prevent this from happening.

Reported-by: Andi Kleen <andi@firstfloor.org>
Suggested-by: Ben Hutchings <ben@decadent.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/x86/Makefile        | 2 +-
 scripts/Makefile.fwinst  | 4 ++--
 scripts/package/buildtar | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c098ca4671d..b0c5276861e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -138,7 +138,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
 
-archscripts: scripts_basic
+archscripts:
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 ###
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index 4d908d16c03..c3f69ae275d 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -27,7 +27,7 @@ endif
 installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
 
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
-installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./
+installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
 
 # Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
 PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
@@ -42,7 +42,7 @@ quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
 $(installed-fw-dirs):
 	$(call cmd,mkdir)
 
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $$(dir $(INSTALL_FW_PATH)/%)
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 8a7b15598ea..d0d748e7291 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -109,7 +109,7 @@ esac
 	if tar --owner=root --group=root --help >/dev/null 2>&1; then
 		opts="--owner=root --group=root"
 	fi
-	tar cf - . $opts | ${compress} > "${tarball}${file_ext}"
+	tar cf - boot/* lib/* $opts | ${compress} > "${tarball}${file_ext}"
 )
 
 echo "Tarball successfully created in ${tarball}${file_ext}"
-- 
cgit v1.2.3-70-g09d2


From f9a38eace4498a5e9f6d2cdfc879d5444edc3a5f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Sep 2012 13:39:47 -0400
Subject: um: let signal_delivered() do SIGTRAP on singlestepping into handler

... rather than duplicating that in sigframe setup code (and doing that
inconsistently, at that)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/signal.c | 6 +++++-
 arch/x86/um/signal.c    | 6 ------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 7362d58efc2..cc9c2350e41 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 			 struct k_sigaction *ka, siginfo_t *info)
 {
 	sigset_t *oldset = sigmask_to_save();
+	int singlestep = 0;
 	unsigned long sp;
 	int err;
 
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		singlestep = 1;
+
 	/* Did we come from a system call? */
 	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
@@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	if (err)
 		force_sigsegv(signr, current);
 	else
-		signal_delivered(signr, info, ka, regs, 0);
+		signal_delivered(signr, info, ka, regs, singlestep);
 }
 
 static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index a508cea1350..ba7363ecf89 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) 0;
 	PT_REGS_CX(regs) = (unsigned long) 0;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
@@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) &frame->info;
 	PT_REGS_CX(regs) = (unsigned long) &frame->uc;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d2ce4e92fa4f79a5fdb4cc912b411280afe21697 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Sep 2012 09:28:25 -0400
Subject: um: kill thread->forking

we only use that to tell copy_thread() done by syscall from that
done by kernel_thread().  However, it's easier to do simply by
checking PF_KTHREAD in thread flags.

Merge sys_clone() guts for 32bit and 64bit, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  9 ---------
 arch/um/kernel/process.c                |  8 ++++----
 arch/um/kernel/syscall.c                | 24 ++++++++++++------------
 arch/x86/um/shared/sysdep/syscalls.h    |  2 ++
 arch/x86/um/sys_call_table_32.c         |  2 +-
 arch/x86/um/syscalls_32.c               | 27 +++++++--------------------
 arch/x86/um/syscalls_64.c               | 23 +++--------------------
 7 files changed, 29 insertions(+), 66 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 69f1c57a8d0..33a6a2423bd 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,14 +20,6 @@ struct mm_struct;
 
 struct thread_struct {
 	struct task_struct *saved_task;
-	/*
-	 * This flag is set to 1 before calling do_fork (and analyzed in
-	 * copy_thread) to mark that we are begin called from userspace (fork /
-	 * vfork / clone), and reset to 0 after. It is left to 0 when called
-	 * from kernelspace (i.e. kernel_thread() or fork_idle(),
-	 * as of 2.6.11).
-	 */
-	int forking;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
@@ -58,7 +50,6 @@ struct thread_struct {
 
 #define INIT_THREAD \
 { \
-	.forking		= 0, \
 	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 57fc7028714..c5f5afa5074 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
+	int kthread = current->flags & PF_KTHREAD;
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
 
-	if (current->thread.forking) {
+	if (!kthread) {
 	  	memcpy(&p->thread.regs.regs, &regs->regs,
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		handler = fork_handler;
 
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
-	}
-	else {
+	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
 		p->thread.request.u.thread = current->thread.request.u.thread;
 		handler = new_thread_handler;
@@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
 	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
 
-	if (current->thread.forking) {
+	if (!kthread) {
 		clear_flushed_tls(p);
 
 		/*
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index f958cb876ee..a4c6d8eee74 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -17,25 +17,25 @@
 
 long sys_fork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+	return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
 }
 
 long sys_vfork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
 		      UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid)
+{
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+
+	return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
 }
 
 long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index bd9a89b67e4..ca255a805ed 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,3 +1,5 @@
+extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 68d1dc91b37..b5408cecac6 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -28,7 +28,7 @@
 #define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
+#define ptregs_clone i386_clone
 #define ptregs_vm86 sys_vm86
 #define ptregs_sigaltstack sys_sigaltstack
 #define ptregs_vfork sys_vfork
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index b853e8600b9..db444c7218f 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -3,37 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/syscalls.h>
+#include <sysdep/syscalls.h>
 
 /*
  * The prototype on i386 is:
  *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
  *
  * and the "newtls" arg. on i386 is read by copy_thread directly from the
  * register saved on the stack.
  */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tid, void *newtls, int __user *child_tid)
+long i386_clone(unsigned long clone_flags, unsigned long newsp,
+		int __user *parent_tid, void *newtls, int __user *child_tid)
 {
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
+	return sys_clone(clone_flags, newsp, parent_tid, child_tid);
 }
 
+
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index f3d82bb6e15..adb08eb5c22 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -5,12 +5,9 @@
  * Licensed under the GPL
  */
 
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <os.h>
 
 long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 {
@@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr)
 	return arch_prctl(current, code, (unsigned long __user *) addr);
 }
 
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid)
-{
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
-}
-
 void arch_switch_to(struct task_struct *to)
 {
 	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-- 
cgit v1.2.3-70-g09d2


From b2cc2a074de75671bbed5e2dda67a9252ef353ea Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 26 Sep 2012 18:02:28 -0700
Subject: x86, smep, smap: Make the switching functions one-way

There is no fundamental reason why we should switch SMEP and SMAP on
during early cpu initialization just to switch them off again.  Now
with %eflags and %cr4 forced to be initialized to a clean state, we
only need the one-way enable.  Also, make the functions inline to make
them (somewhat) harder to abuse.

This does mean that SMEP and SMAP do not get initialized anywhere near
as early.  Even using early_param() instead of __setup() doesn't give
us control early enough to do this during the early cpu initialization
phase.  This seems reasonable to me, because SMEP and SMAP should not
matter until we have userspace to protect ourselves from, but it does
potentially make it possible for a bug involving a "leak of
permissions to userspace" to get uncaught.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/common.c | 49 ++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 31 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 44aec5d4dfa..fefd9b7e93e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -259,48 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
-static int disable_smep __cpuinitdata;
 static __init int setup_disable_smep(char *arg)
 {
-	disable_smep = 1;
+	setup_clear_cpu_cap(X86_FEATURE_SMEP);
 	return 1;
 }
 __setup("nosmep", setup_disable_smep);
 
-static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
+static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMEP)) {
-		if (unlikely(disable_smep)) {
-			setup_clear_cpu_cap(X86_FEATURE_SMEP);
-			clear_in_cr4(X86_CR4_SMEP);
-		} else
-			set_in_cr4(X86_CR4_SMEP);
-	}
+	if (cpu_has(c, X86_FEATURE_SMEP))
+		set_in_cr4(X86_CR4_SMEP);
 }
 
-static int disable_smap __cpuinitdata;
 static __init int setup_disable_smap(char *arg)
 {
-	disable_smap = 1;
+	setup_clear_cpu_cap(X86_FEATURE_SMAP);
 	return 1;
 }
 __setup("nosmap", setup_disable_smap);
 
-static __cpuinit void setup_smap(struct cpuinfo_x86 *c)
+static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMAP)) {
-		if (unlikely(disable_smap)) {
-			setup_clear_cpu_cap(X86_FEATURE_SMAP);
-			clear_in_cr4(X86_CR4_SMAP);
-		} else {
-			set_in_cr4(X86_CR4_SMAP);
-			/*
-			 * Don't use clac() here since alternatives
-			 * haven't run yet...
-			 */
-			asm volatile(__stringify(__ASM_CLAC) ::: "memory");
-		}
-	}
+	unsigned long eflags;
+
+	/* This should have been cleared long ago */
+	raw_local_save_flags(eflags);
+	BUG_ON(eflags & X86_EFLAGS_AC);
+
+	if (cpu_has(c, X86_FEATURE_SMAP))
+		set_in_cr4(X86_CR4_SMAP);
 }
 
 /*
@@ -737,9 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	c->cpu_index = 0;
 	filter_cpuid_features(c, false);
 
-	setup_smep(c);
-	setup_smap(c);
-
 	if (this_cpu->c_bsp_init)
 		this_cpu->c_bsp_init(c);
 }
@@ -824,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 		c->phys_proc_id = c->initial_apicid;
 	}
 
-	setup_smep(c);
-
 	get_model_name(c); /* Default name */
 
 	detect_nopl(c);
@@ -890,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
+	/* Set up SMEP/SMAP */
+	setup_smep(c);
+	setup_smap(c);
+
 	/*
 	 * The vendor-specific functions might have changed features.
 	 * Now we do "generic changes."
-- 
cgit v1.2.3-70-g09d2


From 450cc201038f31bd496e1b3a44a49790b8827a06 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 27 Sep 2012 10:08:00 -0700
Subject: x86/mce: Provide boot argument to honour bios-set CMCI threshold

The ACPI spec doesn't provide for a way for the bios to pass down
recommended thresholds to the OS on a _per-bank_ basis. This patch adds
a new boot option, which if passed, tells Linux to use CMCI thresholds
set by the bios.

As fail-safe, we initialize threshold to 1 if some banks have not been
initialized by the bios and warn the user.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/x86/x86_64/boot-options.txt |  7 +++++++
 arch/x86/include/asm/mce.h                |  1 +
 arch/x86/kernel/cpu/mcheck/mce.c          | 10 +++++++++
 arch/x86/kernel/cpu/mcheck/mce_intel.c    | 35 ++++++++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index c54b4f503e2..de38429beb7 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -50,6 +50,13 @@ Machine check
 		monarchtimeout:
 		Sets the time in us to wait for other CPUs on machine checks. 0
 		to disable.
+   mce=bios_cmci_threshold
+		Don't overwrite the bios-set CMCI threshold. This boot option
+		prevents Linux from overwriting the CMCI threshold set by the
+		bios. Without this option, Linux always sets the CMCI
+		threshold to 1. Enabling this may make memory predictive failure
+		analysis less effective if the bios sets thresholds for memory
+		errors since we will not see details for all errors.
 
    nomce (for compatibility with i386): same as mce=off
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ccaf7c581c8..54d73b1f00a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
 #ifdef CONFIG_X86_MCE_INTEL
 extern int mce_cmci_disabled;
 extern int mce_ignore_ce;
+extern int mce_bios_cmci_threshold;
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c311122ea83..29e87d3b284 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int			mce_dont_log_ce		__read_mostly;
 int				mce_cmci_disabled	__read_mostly;
 int				mce_ignore_ce		__read_mostly;
 int				mce_ser			__read_mostly;
+int				mce_bios_cmci_threshold	__read_mostly;
 
 struct mce_bank                *mce_banks		__read_mostly;
 
@@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
  *	check, or 0 to not wait
  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
  * mce=nobootlog Don't log MCEs from before booting.
+ * mce=bios_cmci_threshold Don't program the CMCI threshold
  */
 static int __init mcheck_enable(char *str)
 {
@@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str)
 		mce_ignore_ce = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		mce_bootlog = (str[0] == 'b');
+	else if (!strcmp(str, "bios_cmci_threshold"))
+		mce_bios_cmci_threshold = 1;
 	else if (isdigit(str[0])) {
 		get_option(&str, &tolerant);
 		if (*str == ',') {
@@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
 	&mce_cmci_disabled
 };
 
+static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
+	__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
+	&mce_bios_cmci_threshold
+};
+
 static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_tolerant.attr,
 	&dev_attr_check_interval.attr,
@@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_dont_log_ce.attr,
 	&dev_attr_ignore_ce.attr,
 	&dev_attr_cmci_disabled.attr,
+	&dev_attr_bios_cmci_threshold.attr,
 	NULL
 };
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 098386fed48..5f88abf07e9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -181,10 +181,12 @@ static void cmci_discover(int banks)
 	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
 	unsigned long flags;
 	int i;
+	int bios_wrong_thresh = 0;
 
 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 	for (i = 0; i < banks; i++) {
 		u64 val;
+		int bios_zero_thresh = 0;
 
 		if (test_bit(i, owned))
 			continue;
@@ -198,8 +200,20 @@ static void cmci_discover(int banks)
 			continue;
 		}
 
-		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
-		val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
+		if (!mce_bios_cmci_threshold) {
+			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
+			val |= CMCI_THRESHOLD;
+		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
+			/*
+			 * If bios_cmci_threshold boot option was specified
+			 * but the threshold is zero, we'll try to initialize
+			 * it to 1.
+			 */
+			bios_zero_thresh = 1;
+			val |= CMCI_THRESHOLD;
+		}
+
+		val |= MCI_CTL2_CMCI_EN;
 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 
@@ -207,11 +221,26 @@ static void cmci_discover(int banks)
 		if (val & MCI_CTL2_CMCI_EN) {
 			set_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
+			/*
+			 * We are able to set thresholds for some banks that
+			 * had a threshold of 0. This means the BIOS has not
+			 * set the thresholds properly or does not work with
+			 * this boot option. Note down now and report later.
+			 */
+			if (mce_bios_cmci_threshold && bios_zero_thresh &&
+					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
+				bios_wrong_thresh = 1;
 		} else {
 			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 		}
 	}
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+	if (mce_bios_cmci_threshold && bios_wrong_thresh) {
+		pr_info_once(
+			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
+		pr_info_once(
+			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
+	}
 }
 
 /*
@@ -249,7 +278,7 @@ void cmci_clear(void)
 			continue;
 		/* Disable CMCI */
 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
-		val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
+		val &= ~MCI_CTL2_CMCI_EN;
 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		__clear_bit(i, __get_cpu_var(mce_banks_owned));
 	}
-- 
cgit v1.2.3-70-g09d2


From bbb35efcda41d589cfff5e2b08c5fb457791117c Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 27 Sep 2012 20:10:57 +0200
Subject: um: Fix IPC on um
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c1d7e01d (ipc: use Kconfig options for __ARCH_WANT_[COMPAT_]IPC_PARSE_VERSION)
forgot UML and broke IPC on it.
Also UML has to select ARCH_WANT_IPC_PARSE_VERSION usin Kconfig.

Reported-and-tested-by: <Toralf Förster toralf.foerster@gmx.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772..aeaff8bef2f 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -21,6 +21,7 @@ config 64BIT
 config X86_32
 	def_bool !64BIT
 	select HAVE_AOUT
+	select ARCH_WANT_IPC_PARSE_VERSION
 
 config X86_64
 	def_bool 64BIT
-- 
cgit v1.2.3-70-g09d2


From 9429ec96c2718c0d1e3317cf60a87a0405223814 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 16 Aug 2012 20:15:05 +0200
Subject: um: Preinclude include/linux/kern_levels.h

The userspace part of UML uses the asm-offsets.h generator mechanism to
create definitions for UM_KERN_<LEVEL> that match the in-kernel
KERN_<LEVEL> constant definitions.

As of commit 04d2c8c83d0e3ac5f78aeede51babb3236200112 ("printk: convert
the format for KERN_<LEVEL> to a 2 byte pattern"), KERN_<LEVEL> is no
longer expanded to the literal '"<LEVEL>"', but to '"\001" "LEVEL"', i.e.
it contains two parts.

However, the combo of DEFINE_STR() in
arch/x86/um/shared/sysdep/kernel-offsets.h and sed-y in Kbuild doesn't
support string literals consisting of multiple parts. Hence for all
UM_KERN_<LEVEL> definitions, only the SOH character is retained in the actual
definition, while the remainder ends up in the comment. E.g. in
include/generated/asm-offsets.h we get

    #define UM_KERN_INFO "\001" /* "6" KERN_INFO */

instead of

    #define UM_KERN_INFO "\001" "6" /* KERN_INFO */

This causes spurious '^A' output in some kernel messages:

    Calibrating delay loop... 4640.76 BogoMIPS (lpj=23203840)
    pid_max: default: 32768 minimum: 301
    Mount-cache hash table entries: 256
    ^AChecking that host ptys support output SIGIO...Yes
    ^AChecking that host ptys support SIGIO on close...No, enabling workaround
    ^AUsing 2.6 host AIO
    NET: Registered protocol family 16
    bio: create slab <bio-0> at 0
    Switching to clocksource itimer

To fix this:
  - Move the mapping from UM_KERN_<LEVEL> to KERN_<LEVEL> from
    arch/um/include/shared/common-offsets.h to
    arch/um/include/shared/user.h, which is preincluded for all userspace
    parts,
  - Preinclude include/linux/kern_levels.h for all userspace parts, to
    obtain the in-kernel KERN_<LEVEL> constant definitions. This doesn't
    violate the kernel/userspace separation, as include/linux/kern_levels.h
    is self-contained and doesn't expose any other kernel internals.
  - Remove the now unused STR() and DEFINE_STR() macros.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/shared/common-offsets.h    | 10 ----------
 arch/um/include/shared/user.h              | 11 +++++++++++
 arch/um/scripts/Makefile.rules             |  2 +-
 arch/x86/um/shared/sysdep/kernel-offsets.h |  3 ---
 4 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 40db8f71dea..2df313b6a58 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
 DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
 DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
 
-DEFINE_STR(UM_KERN_EMERG, KERN_EMERG);
-DEFINE_STR(UM_KERN_ALERT, KERN_ALERT);
-DEFINE_STR(UM_KERN_CRIT, KERN_CRIT);
-DEFINE_STR(UM_KERN_ERR, KERN_ERR);
-DEFINE_STR(UM_KERN_WARNING, KERN_WARNING);
-DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
-DEFINE_STR(UM_KERN_INFO, KERN_INFO);
-DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
-DEFINE_STR(UM_KERN_CONT, KERN_CONT);
-
 DEFINE(UM_ELF_CLASS, ELF_CLASS);
 DEFINE(UM_ELFCLASS32, ELFCLASS32);
 DEFINE(UM_ELFCLASS64, ELFCLASS64);
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 4fa82c055aa..cef06856333 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -26,6 +26,17 @@
 extern void panic(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 
+/* Requires preincluding include/linux/kern_levels.h */
+#define UM_KERN_EMERG	KERN_EMERG
+#define UM_KERN_ALERT	KERN_ALERT
+#define UM_KERN_CRIT	KERN_CRIT
+#define UM_KERN_ERR	KERN_ERR
+#define UM_KERN_WARNING	KERN_WARNING
+#define UM_KERN_NOTICE	KERN_NOTICE
+#define UM_KERN_INFO	KERN_INFO
+#define UM_KERN_DEBUG	KERN_DEBUG
+#define UM_KERN_CONT	KERN_CONT
+
 #ifdef UML_CONFIG_PRINTK
 extern int printk(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index d50270d26b4..15889df9b46 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m)  $(USER_SINGLE_OBJS))
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
 $(USER_OBJS:.o=.%): \
-	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
+	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o)
 
 # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of
 # using it directly.
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 5868526b5ee..46a9df99f3c 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -7,9 +7,6 @@
 #define DEFINE(sym, val) \
 	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
 
-#define STR(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
-
 #define BLANK() asm volatile("\n->" : : )
 
 #define OFFSET(sym, str, mem) \
-- 
cgit v1.2.3-70-g09d2


From 786d35d45cc40b2a51a18f73e14e135d47fdced7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 28 Sep 2012 14:31:03 +0930
Subject: Make most arch asm/module.h files use asm-generic/module.h

Use the mapping of Elf_[SPE]hdr, Elf_Addr, Elf_Sym, Elf_Dyn, Elf_Rel/Rela,
ELF_R_TYPE() and ELF_R_SYM() to either the 32-bit version or the 64-bit version
into asm-generic/module.h for all arches bar MIPS.

Also, use the generic definition mod_arch_specific where possible.

To this end, I've defined three new config bools:

 (*) HAVE_MOD_ARCH_SPECIFIC

     Arches define this if they don't want to use the empty generic
     mod_arch_specific struct.

 (*) MODULES_USE_ELF_RELA

     Arches define this if their modules can contain RELA records.  This causes
     the Elf_Rela mapping to be emitted and allows apply_relocate_add() to be
     defined by the arch rather than have the core emit an error message.

 (*) MODULES_USE_ELF_REL

     Arches define this if their modules can contain REL records.  This causes
     the Elf_Rel mapping to be emitted and allows apply_relocate() to be
     defined by the arch rather than have the core emit an error message.

Note that it is possible to allow both REL and RELA records: m68k and mips are
two arches that do this.

With this, some arch asm/module.h files can be deleted entirely and replaced
with a generic-y marker in the arch Kbuild file.

Additionally, I have removed the bits from m32r and score that handle the
unsupported type of relocation record as that's now handled centrally.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/Kconfig                       | 19 ++++++++++++++++++
 arch/alpha/Kconfig                 |  2 ++
 arch/alpha/include/asm/module.h    | 10 ++--------
 arch/arm/Kconfig                   |  2 ++
 arch/arm/include/asm/module.h      |  8 ++------
 arch/avr32/Kconfig                 |  2 ++
 arch/avr32/include/asm/module.h    |  6 ++----
 arch/blackfin/Kconfig              |  2 ++
 arch/blackfin/include/asm/module.h |  4 +---
 arch/c6x/Kconfig                   |  1 +
 arch/c6x/include/asm/module.h      | 12 +-----------
 arch/cris/Kconfig                  |  1 +
 arch/cris/include/asm/Kbuild       |  2 ++
 arch/cris/include/asm/module.h     |  9 ---------
 arch/frv/include/asm/module.h      |  8 +-------
 arch/h8300/Kconfig                 |  1 +
 arch/h8300/include/asm/Kbuild      |  2 ++
 arch/h8300/include/asm/module.h    | 11 -----------
 arch/hexagon/Kconfig               |  1 +
 arch/ia64/Kconfig                  |  2 ++
 arch/ia64/include/asm/module.h     |  6 ++----
 arch/m32r/Kconfig                  |  1 +
 arch/m32r/include/asm/Kbuild       |  2 ++
 arch/m32r/include/asm/module.h     | 10 ----------
 arch/m32r/kernel/module.c          | 15 --------------
 arch/m68k/Kconfig                  |  3 +++
 arch/m68k/include/asm/module.h     |  6 ++----
 arch/microblaze/Kconfig            |  1 +
 arch/mips/Kconfig                  |  3 +++
 arch/mips/include/asm/module.h     | 10 ++++++++--
 arch/mips/kernel/Makefile          |  2 +-
 arch/mn10300/Kconfig               |  1 +
 arch/mn10300/include/asm/module.h  |  7 +------
 arch/openrisc/Kconfig              |  1 +
 arch/parisc/Kconfig                |  2 ++
 arch/parisc/include/asm/module.h   | 16 +++------------
 arch/powerpc/Kconfig               |  2 ++
 arch/powerpc/include/asm/module.h  |  7 +------
 arch/s390/Kconfig                  |  2 ++
 arch/s390/include/asm/module.h     | 18 +++--------------
 arch/score/Kconfig                 |  2 ++
 arch/score/include/asm/module.h    |  6 +-----
 arch/score/kernel/module.c         | 10 ----------
 arch/sh/Kconfig                    |  2 ++
 arch/sh/include/asm/module.h       | 14 +++----------
 arch/sparc/Kconfig                 |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/sparc/include/asm/module.h    | 24 -----------------------
 arch/tile/Kconfig                  |  1 +
 arch/unicore32/Kconfig             |  1 +
 arch/x86/Kconfig                   |  2 ++
 arch/x86/um/Kconfig                |  2 ++
 arch/xtensa/Kconfig                |  1 +
 arch/xtensa/include/asm/module.h   |  9 +--------
 include/asm-generic/module.h       | 40 +++++++++++++++++++++++++++++++-------
 include/linux/moduleloader.h       | 36 ++++++++++++++++++++++++++++++----
 kernel/module.c                    | 20 -------------------
 57 files changed, 168 insertions(+), 224 deletions(-)
 delete mode 100644 arch/cris/include/asm/module.h
 delete mode 100644 arch/h8300/include/asm/module.h
 delete mode 100644 arch/m32r/include/asm/module.h
 delete mode 100644 arch/sparc/include/asm/module.h

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa189cc..3450115c643 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -281,4 +281,23 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
+config HAVE_MOD_ARCH_SPECIFIC
+	bool
+	help
+	  The arch uses struct mod_arch_specific to store data.  Many arches
+	  just need a simple module loader without arch specific data - those
+	  should not enable this.
+
+config MODULES_USE_ELF_RELA
+	bool
+	help
+	  Modules only use ELF RELA relocations.  Modules with ELF REL
+	  relocations will give an error.
+
+config MODULES_USE_ELF_REL
+	bool
+	help
+	  Modules only use ELF REL relocations.  Modules with ELF RELA
+	  relocations will give an error.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9944dedee5b..7e3710c0cce 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -20,6 +20,8 @@ config ALPHA
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/module.h b/arch/alpha/include/asm/module.h
index 7b63743c534..9cd13b55155 100644
--- a/arch/alpha/include/asm/module.h
+++ b/arch/alpha/include/asm/module.h
@@ -1,19 +1,13 @@
 #ifndef _ALPHA_MODULE_H
 #define _ALPHA_MODULE_H
 
+#include <asm-generic/module.h>
+
 struct mod_arch_specific
 {
 	unsigned int gotsecindex;
 };
 
-#define Elf_Sym Elf64_Sym
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Phdr Elf64_Phdr
-#define Elf_Dyn Elf64_Dyn
-#define Elf_Rel Elf64_Rel
-#define Elf_Rela Elf64_Rela
-
 #define ARCH_SHF_SMALL SHF_ALPHA_GPREL
 
 #ifdef MODULE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2f88d8d9770..7a08b3a71c0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -49,6 +49,8 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+	select MODULES_USE_ELF_REL
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 6c6809f982f..0d3a28dbc8e 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -1,9 +1,7 @@
 #ifndef _ASM_ARM_MODULE_H
 #define _ASM_ARM_MODULE_H
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 struct unwind_table;
 
@@ -16,13 +14,11 @@ enum {
 	ARM_SEC_DEVEXIT,
 	ARM_SEC_MAX,
 };
-#endif
 
 struct mod_arch_specific {
-#ifdef CONFIG_ARM_UNWIND
 	struct unwind_table *unwind[ARM_SEC_MAX];
-#endif
 };
+#endif
 
 /*
  * Add the ARM architecture version to the version magic string
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 5ade51c8a87..06e73bf665e 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -15,6 +15,8 @@ config AVR32
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/module.h b/arch/avr32/include/asm/module.h
index 451444538a1..3f083d385a6 100644
--- a/arch/avr32/include/asm/module.h
+++ b/arch/avr32/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_AVR32_MODULE_H
 #define __ASM_AVR32_MODULE_H
 
+#include <asm-generic/module.h>
+
 struct mod_arch_syminfo {
 	unsigned long got_offset;
 	int got_initialized;
@@ -17,10 +19,6 @@ struct mod_arch_specific {
 	struct mod_arch_syminfo *syminfo;
 };
 
-#define Elf_Shdr		Elf32_Shdr
-#define Elf_Sym			Elf32_Sym
-#define Elf_Ehdr		Elf32_Ehdr
-
 #define MODULE_PROC_FAMILY "AVR32v1"
 
 #define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c7092e6057c..8e82e267b89 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -42,6 +42,8 @@ config BLACKFIN
 	select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index ed5689b82c9..231a149b3f7 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,9 +7,7 @@
 #ifndef _ASM_BFIN_MODULE_H
 #define _ASM_BFIN_MODULE_H
 
-#define Elf_Shdr        Elf32_Shdr
-#define Elf_Sym         Elf32_Sym
-#define Elf_Ehdr        Elf32_Ehdr
+#include <asm-generic/module.h>
 
 struct mod_arch_specific {
 	Elf_Shdr	*text_l1;
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 983c859e40b..f6a3648f5ec 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -17,6 +17,7 @@ config C6X
 	select OF
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config MMU
 	def_bool n
diff --git a/arch/c6x/include/asm/module.h b/arch/c6x/include/asm/module.h
index a453f9744f4..5c7269c7ef7 100644
--- a/arch/c6x/include/asm/module.h
+++ b/arch/c6x/include/asm/module.h
@@ -13,17 +13,7 @@
 #ifndef _ASM_C6X_MODULE_H
 #define _ASM_C6X_MODULE_H
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-#define Elf_Addr	Elf32_Addr
-#define Elf_Word	Elf32_Word
-
-/*
- * This file contains the C6x architecture specific module code.
- */
-struct mod_arch_specific {
-};
+#include <asm-generic/module.h>
 
 struct loaded_sections {
 	unsigned int new_vaddr;
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index e92215428a3..7bb8cf90e6a 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -47,6 +47,7 @@ config CRIS
 	select GENERIC_IOMAP
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
+	select MODULES_USE_ELF_RELA
 
 config HZ
 	int
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 04d02a51c5e..28b690de797 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -7,3 +7,5 @@ header-y += ethernet.h
 header-y += etraxgpio.h
 header-y += rs485.h
 header-y += sync_serial.h
+
+generic-y += module.h
diff --git a/arch/cris/include/asm/module.h b/arch/cris/include/asm/module.h
deleted file mode 100644
index 7ee72311bd7..00000000000
--- a/arch/cris/include/asm/module.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ASM_CRIS_MODULE_H
-#define _ASM_CRIS_MODULE_H
-/* cris is simple */
-struct mod_arch_specific { };
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#endif /* _ASM_CRIS_MODULE_H */
diff --git a/arch/frv/include/asm/module.h b/arch/frv/include/asm/module.h
index 3d5c6360289..a8848f09a21 100644
--- a/arch/frv/include/asm/module.h
+++ b/arch/frv/include/asm/module.h
@@ -11,13 +11,7 @@
 #ifndef _ASM_MODULE_H
 #define _ASM_MODULE_H
 
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 /*
  * Include the architecture version.
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09c..c149d3b29eb 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -6,6 +6,7 @@ config H8300
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
+	select MODULES_USE_ELF_RELA
 
 config SYMBOL_PREFIX
 	string
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index c68e1680da0..871382d239f 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= module.h
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
deleted file mode 100644
index 8e46724b7c0..00000000000
--- a/arch/h8300/include/asm/module.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_H8300_MODULE_H
-#define _ASM_H8300_MODULE_H
-/*
- * This file contains the H8/300 architecture specific module code.
- */
-struct mod_arch_specific { };
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index b2fdfb700f5..0744f7d7b1f 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -30,6 +30,7 @@ config HEXAGON
 	select KTIME_SCALAR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
+	select MODULES_USE_ELF_RELA
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf5781fa..688146466d0 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -39,6 +39,8 @@ config IA64
 	select ARCH_THREAD_INFO_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_TIME_VSYSCALL
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
index 908eaef42a0..dfba22a872c 100644
--- a/arch/ia64/include/asm/module.h
+++ b/arch/ia64/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_IA64_MODULE_H
 #define _ASM_IA64_MODULE_H
 
+#include <asm-generic/module.h>
+
 /*
  * IA-64-specific support for kernel module loader.
  *
@@ -29,10 +31,6 @@ struct mod_arch_specific {
 	unsigned int next_got_entry;	/* index of next available got entry */
 };
 
-#define Elf_Shdr	Elf64_Shdr
-#define Elf_Sym		Elf64_Sym
-#define Elf_Ehdr	Elf64_Ehdr
-
 #define MODULE_PROC_FAMILY	"ia64"
 #define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY \
 	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 49498bbb961..fc6153361bf 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -13,6 +13,7 @@ config M32R
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
+	select MODULES_USE_ELF_RELA
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c68e1680da0..871382d239f 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= module.h
diff --git a/arch/m32r/include/asm/module.h b/arch/m32r/include/asm/module.h
deleted file mode 100644
index eb73ee01121..00000000000
--- a/arch/m32r/include/asm/module.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_M32R_MODULE_H
-#define _ASM_M32R_MODULE_H
-
-struct mod_arch_specific { };
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-
-#endif /* _ASM_M32R_MODULE_H */
diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c
index 3071fe83ffc..38233b6596b 100644
--- a/arch/m32r/kernel/module.c
+++ b/arch/m32r/kernel/module.c
@@ -201,18 +201,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	}
 	return 0;
 }
-
-int apply_relocate(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-#if 0
-	printk(KERN_ERR "module %s: REL RELOCATION unsupported\n",
-	       me->name);
-	return -ENOEXEC;
-#endif
-	return 0;
-
-}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dc..0df07cee3fa 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -13,6 +13,9 @@ config M68K
 	select FPU if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
+	select MODULES_USE_ELF_RELA
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
diff --git a/arch/m68k/include/asm/module.h b/arch/m68k/include/asm/module.h
index edffe66b7f4..8b58fce843d 100644
--- a/arch/m68k/include/asm/module.h
+++ b/arch/m68k/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_M68K_MODULE_H
 #define _ASM_M68K_MODULE_H
 
+#include <asm-generic/module.h>
+
 enum m68k_fixup_type {
 	m68k_fixup_memoffset,
 	m68k_fixup_vnode_shift,
@@ -36,8 +38,4 @@ struct module;
 extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
 			 struct m68k_fixup_info *end);
 
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
 #endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index ab9afcaa7f6..b4f409f942a 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -24,6 +24,7 @@ config MICROBLAZE
 	select GENERIC_CPU_DEVICES
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config SWAP
 	def_bool n
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index faf65286574..dccdfcd9e18 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -36,6 +36,9 @@ config MIPS
 	select BUILDTIME_EXTABLE_SORT
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
+	select MODULES_USE_ELF_RELA if 64BIT
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index dca8bce8c7a..26137da1c71 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -35,11 +35,14 @@ typedef struct {
 } Elf64_Mips_Rela;
 
 #ifdef CONFIG_32BIT
-
 #define Elf_Shdr	Elf32_Shdr
 #define Elf_Sym		Elf32_Sym
 #define Elf_Ehdr	Elf32_Ehdr
 #define Elf_Addr	Elf32_Addr
+#define Elf_Rel		Elf32_Rel
+#define Elf_Rela	Elf32_Rela
+#define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF32_R_SYM(X)
 
 #define Elf_Mips_Rel	Elf32_Rel
 #define Elf_Mips_Rela	Elf32_Rela
@@ -50,11 +53,14 @@ typedef struct {
 #endif
 
 #ifdef CONFIG_64BIT
-
 #define Elf_Shdr	Elf64_Shdr
 #define Elf_Sym		Elf64_Sym
 #define Elf_Ehdr	Elf64_Ehdr
 #define Elf_Addr	Elf64_Addr
+#define Elf_Rel		Elf64_Rel
+#define Elf_Rela	Elf64_Rela
+#define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF64_R_SYM(X)
 
 #define Elf_Mips_Rel	Elf64_Mips_Rel
 #define Elf_Mips_Rela	Elf64_Mips_Rela
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index e2c14999839..cd1e6c2421b 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_SYNC_R4K)		+= sync-r4k.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_MODULES)		+= mips_ksyms.o module.o
-obj-$(CONFIG_MODULES)		+= module-rela.o
+obj-$(CONFIG_MODULES_USE_ELF_RELA) += module-rela.o
 
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 5cfb086b390..aa03f2e1338 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@ config MN10300
 	select HAVE_ARCH_KGDB
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config AM33_2
 	def_bool n
diff --git a/arch/mn10300/include/asm/module.h b/arch/mn10300/include/asm/module.h
index 5d7057d0149..6571103b051 100644
--- a/arch/mn10300/include/asm/module.h
+++ b/arch/mn10300/include/asm/module.h
@@ -12,12 +12,7 @@
 #ifndef _ASM_MODULE_H
 #define _ASM_MODULE_H
 
-struct mod_arch_specific {
-};
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 /*
  * Include the MN10300 architecture version.
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 49765b53f63..05f2ba41ff1 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -21,6 +21,7 @@ config OPENRISC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_RELA
 
 config MMU
 	def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3ff21b536f2..166d9911bc8 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -19,6 +19,8 @@ config PARISC
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h
index 1f4123427ea..bab37e99168 100644
--- a/arch/parisc/include/asm/module.h
+++ b/arch/parisc/include/asm/module.h
@@ -1,21 +1,11 @@
 #ifndef _ASM_PARISC_MODULE_H
 #define _ASM_PARISC_MODULE_H
+
+#include <asm-generic/module.h>
+
 /*
  * This file contains the parisc architecture specific module code.
  */
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Addr Elf64_Addr
-#define Elf_Rela Elf64_Rela
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#define Elf_Addr Elf32_Addr
-#define Elf_Rela Elf32_Rela
-#endif
 
 struct unwind_table;
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 352f416269c..74f84781b48 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,6 +139,8 @@ config PPC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 0192a4ee2bc..c1df590ec44 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -11,6 +11,7 @@
 
 #include <linux/list.h>
 #include <asm/bug.h>
+#include <asm-generic/module.h>
 
 
 #ifndef __powerpc64__
@@ -60,16 +61,10 @@ struct mod_arch_specific {
  */
 
 #ifdef __powerpc64__
-#    define Elf_Shdr	Elf64_Shdr
-#    define Elf_Sym	Elf64_Sym
-#    define Elf_Ehdr	Elf64_Ehdr
 #    ifdef MODULE
 	asm(".section .stubs,\"ax\",@nobits; .align 3; .previous");
 #    endif
 #else
-#    define Elf_Shdr	Elf32_Shdr
-#    define Elf_Sym	Elf32_Sym
-#    define Elf_Ehdr	Elf32_Ehdr
 #    ifdef MODULE
 	asm(".section .plt,\"ax\",@nobits; .align 3; .previous");
 	asm(".section .init.plt,\"ax\",@nobits; .align 3; .previous");
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a2..c76a052f60e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -125,6 +125,8 @@ config S390
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
index f0b6b26b6e5..df1f861a848 100644
--- a/arch/s390/include/asm/module.h
+++ b/arch/s390/include/asm/module.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_S390_MODULE_H
 #define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
 /*
  * This file contains the s390 architecture specific module code.
  */
@@ -28,19 +31,4 @@ struct mod_arch_specific
 	struct mod_arch_syminfo *syminfo;
 };
 
-#ifdef CONFIG_64BIT
-#define ElfW(x) Elf64_ ## x
-#define ELFW(x) ELF64_ ## x
-#else
-#define ElfW(x) Elf32_ ## x
-#define ELFW(x) ELF32_ ## x
-#endif
-
-#define Elf_Addr ElfW(Addr)
-#define Elf_Rela ElfW(Rela)
-#define Elf_Shdr ElfW(Shdr)
-#define Elf_Sym ElfW(Sym)
-#define Elf_Ehdr ElfW(Ehdr)
-#define ELF_R_SYM ELFW(R_SYM)
-#define ELF_R_TYPE ELFW(R_TYPE)
 #endif /* _ASM_S390_MODULE_H */
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index ba0f412920b..e2c8db4533d 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -10,6 +10,8 @@ config SCORE
        select ARCH_DISCARD_MEMBLOCK
        select GENERIC_CPU_DEVICES
        select GENERIC_CLOCKEVENTS
+       select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
 
 choice
 	prompt "System type"
diff --git a/arch/score/include/asm/module.h b/arch/score/include/asm/module.h
index f0b5dc0bd02..abf395bbfab 100644
--- a/arch/score/include/asm/module.h
+++ b/arch/score/include/asm/module.h
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <asm/uaccess.h>
+#include <asm-generic/module.h>
 
 struct mod_arch_specific {
 	/* Data Bus Error exception tables */
@@ -13,11 +14,6 @@ struct mod_arch_specific {
 
 typedef uint8_t Elf64_Byte;		/* Type for a 8-bit quantity. */
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-#define Elf_Addr	Elf32_Addr
-
 /* Given an address, look for it in the exception tables. */
 #ifdef CONFIG_MODULES
 const struct exception_table_entry *search_module_dbetables(unsigned long addr);
diff --git a/arch/score/kernel/module.c b/arch/score/kernel/module.c
index 469e3b64e2f..1378d99baa3 100644
--- a/arch/score/kernel/module.c
+++ b/arch/score/kernel/module.c
@@ -125,16 +125,6 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 	return 0;
 }
 
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
-		unsigned int symindex, unsigned int relsec,
-		struct module *me)
-{
-	/* Non-standard return value... most other arch's return -ENOEXEC
-	 * for an unsupported relocation variant
-	 */
-	return 0;
-}
-
 /* Given an address, look for it in the module exception tables. */
 const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 {
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e804..656329a9a59 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -35,6 +35,8 @@ config SUPERH
 	select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
+	select MODULES_USE_ELF_RELA
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/module.h b/arch/sh/include/asm/module.h
index b7927de86f9..81300d8b544 100644
--- a/arch/sh/include/asm/module.h
+++ b/arch/sh/include/asm/module.h
@@ -1,21 +1,13 @@
 #ifndef _ASM_SH_MODULE_H
 #define _ASM_SH_MODULE_H
 
-struct mod_arch_specific {
+#include <asm-generic/module.h>
+
 #ifdef CONFIG_DWARF_UNWINDER
+struct mod_arch_specific {
 	struct list_head fde_list;
 	struct list_head cie_list;
-#endif
 };
-
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
 #endif
 
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e..a244e70b9bb 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,7 @@ config SPARC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_RELA
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 67f83e0a0d6..fbe1cb578fc 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -21,4 +21,5 @@ generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
+generic-y += module.h
 generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/module.h b/arch/sparc/include/asm/module.h
deleted file mode 100644
index ff8e02d8033..00000000000
--- a/arch/sparc/include/asm/module.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __SPARC_MODULE_H
-#define __SPARC_MODULE_H
-struct mod_arch_specific { };
-
-/*
- * Use some preprocessor magic to define the correct symbol
- * for sparc32 and sparc64.
- * Elf_Addr becomes Elf32_Addr for sparc32 and Elf64_Addr for sparc64
- */
-#define ___ELF(a, b, c) a##b##c
-#define __ELF(a, b, c)  ___ELF(a, b, c)
-#define  _Elf(t)        __ELF(Elf, CONFIG_BITS, t)
-#define  _ELF(t)        __ELF(ELF, CONFIG_BITS, t)
-
-#define Elf_Shdr     _Elf(_Shdr)
-#define Elf_Sym      _Elf(_Sym)
-#define Elf_Ehdr     _Elf(_Ehdr)
-#define Elf_Rela     _Elf(_Rela)
-#define Elf_Addr     _Elf(_Addr)
-
-#define ELF_R_SYM    _ELF(_R_SYM)
-#define ELF_R_TYPE   _ELF(_R_TYPE)
-
-#endif /* __SPARC_MODULE_H */
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 932e4430f7f..1603f304339 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -17,6 +17,7 @@ config TILE
 	select SYS_HYPERVISOR
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index b0a47433341..5ef081475bb 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -14,6 +14,7 @@ config UNICORE32
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
+	select MODULES_USE_ELF_REL
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..01726cbcc73 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,8 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_REL if X86_32
+	select MODULES_USE_ELF_RELA if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772..a4b0c10c9d5 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -21,9 +21,11 @@ config 64BIT
 config X86_32
 	def_bool !64BIT
 	select HAVE_AOUT
+	select MODULES_USE_ELF_REL
 
 config X86_64
 	def_bool 64BIT
+	select MODULES_USE_ELF_RELA
 
 config RWSEM_XCHGADD_ALGORITHM
 	def_bool X86_XADD && 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 8ed64cfae4f..4816e44001f 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -11,6 +11,7 @@ config XTENSA
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
+	select MODULES_USE_ELF_RELA
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/arch/xtensa/include/asm/module.h b/arch/xtensa/include/asm/module.h
index d9b34bee4d4..488b40c6f9b 100644
--- a/arch/xtensa/include/asm/module.h
+++ b/arch/xtensa/include/asm/module.h
@@ -13,15 +13,8 @@
 #ifndef _XTENSA_MODULE_H
 #define _XTENSA_MODULE_H
 
-struct mod_arch_specific
-{
-	/* No special elements, yet. */
-};
-
 #define MODULE_ARCH_VERMAGIC "xtensa-" __stringify(XCHAL_CORE_ID) " "
 
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
+#include <asm-generic/module.h>
 
 #endif	/* _XTENSA_MODULE_H */
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
index ed5b44de4c9..14dc41d185a 100644
--- a/include/asm-generic/module.h
+++ b/include/asm-generic/module.h
@@ -5,18 +5,44 @@
  * Many architectures just need a simple module
  * loader without arch specific data.
  */
+#ifndef CONFIG_HAVE_MOD_ARCH_SPECIFIC
 struct mod_arch_specific
 {
 };
+#endif
 
 #ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Sym		Elf64_Sym
+#define Elf_Dyn		Elf64_Dyn
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Addr	Elf64_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf64_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf64_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF64_R_SYM(X)
+
+#else /* CONFIG_64BIT */
+
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Dyn		Elf32_Dyn
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Addr	Elf32_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf32_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf32_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF32_R_SYM(X)
 #endif
 
 #endif /* __ASM_GENERIC_MODULE_H */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index b2be02ebf45..560ca53a75f 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -28,21 +28,49 @@ void *module_alloc(unsigned long size);
 /* Free memory returned from module_alloc. */
 void module_free(struct module *mod, void *module_region);
 
-/* Apply the given relocation to the (simplified) ELF.  Return -error
-   or 0. */
+/*
+ * Apply the given relocation to the (simplified) ELF.  Return -error
+ * or 0.
+ */
+#ifdef CONFIG_MODULES_USE_ELF_REL
 int apply_relocate(Elf_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
 		   unsigned int relsec,
 		   struct module *mod);
+#else
+static inline int apply_relocate(Elf_Shdr *sechdrs,
+				 const char *strtab,
+				 unsigned int symindex,
+				 unsigned int relsec,
+				 struct module *me)
+{
+	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	return -ENOEXEC;
+}
+#endif
 
-/* Apply the given add relocation to the (simplified) ELF.  Return
-   -error or 0 */
+/*
+ * Apply the given add relocation to the (simplified) ELF.  Return
+ * -error or 0
+ */
+#ifdef CONFIG_MODULES_USE_ELF_RELA
 int apply_relocate_add(Elf_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
 		       unsigned int relsec,
 		       struct module *mod);
+#else
+static inline int apply_relocate_add(Elf_Shdr *sechdrs,
+				     const char *strtab,
+				     unsigned int symindex,
+				     unsigned int relsec,
+				     struct module *me)
+{
+	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	return -ENOEXEC;
+}
+#endif
 
 /* Any final processing of module before access.  Return -error or 0. */
 int module_finalize(const Elf_Ehdr *hdr,
diff --git a/kernel/module.c b/kernel/module.c
index 9ad9ee9406d..7f2ee45f362 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1949,26 +1949,6 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 	return ret;
 }
 
-int __weak apply_relocate(Elf_Shdr *sechdrs,
-			  const char *strtab,
-			  unsigned int symindex,
-			  unsigned int relsec,
-			  struct module *me)
-{
-	pr_err("module %s: REL relocation unsupported\n", me->name);
-	return -ENOEXEC;
-}
-
-int __weak apply_relocate_add(Elf_Shdr *sechdrs,
-			      const char *strtab,
-			      unsigned int symindex,
-			      unsigned int relsec,
-			      struct module *me)
-{
-	pr_err("module %s: RELA relocation unsupported\n", me->name);
-	return -ENOEXEC;
-}
-
 static int apply_relocations(struct module *mod, const struct load_info *info)
 {
 	unsigned int i;
-- 
cgit v1.2.3-70-g09d2


From eccbb05a64fef867362ff05b5d266757e3c82b36 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 28 Sep 2012 15:05:15 +0930
Subject: virtio: remove CONFIG_VIRTIO_RING

Everyone who selects VIRTIO is also made to select VIRTIO_RING; just make
them synonymous, since we removed the indirection layer some time ago.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/s390/Kconfig       | 1 -
 arch/x86/lguest/Kconfig | 1 -
 drivers/rpmsg/Kconfig   | 1 -
 drivers/virtio/Kconfig  | 6 ------
 drivers/virtio/Makefile | 3 +--
 5 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a2..9faf3d9b26c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -587,7 +587,6 @@ config S390_GUEST
 	depends on 64BIT && EXPERIMENTAL
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_RING
 	select VIRTIO_CONSOLE
 	help
 	  Select this option if you want to run the kernel as a guest under
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 6e121a2a49e..7872a3330fb 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -4,7 +4,6 @@ config LGUEST_GUEST
 	depends on X86_32
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_RING
 	select VIRTIO_CONSOLE
 	help
 	  Lguest is a tiny in-kernel hypervisor.  Selecting this will
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
index 32aead65735..2bd911f1257 100644
--- a/drivers/rpmsg/Kconfig
+++ b/drivers/rpmsg/Kconfig
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)"
 config RPMSG
 	tristate
 	select VIRTIO
-	select VIRTIO_RING
 	depends on EXPERIMENTAL
 
 endmenu
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index eabc2a0cd98..8d5bddb56cb 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -5,17 +5,12 @@ config VIRTIO
 	  bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
 	  CONFIG_RPMSG or CONFIG_S390_GUEST.
 
-config VIRTIO_RING
-	tristate
-	depends on VIRTIO
-
 menu "Virtio drivers"
 
 config VIRTIO_PCI
 	tristate "PCI driver for virtio devices (EXPERIMENTAL)"
 	depends on PCI && EXPERIMENTAL
 	select VIRTIO
-	select VIRTIO_RING
 	---help---
 	  This drivers provides support for virtio based paravirtual device
 	  drivers over PCI.  This requires that your VMM has appropriate PCI
@@ -40,7 +35,6 @@ config VIRTIO_BALLOON
  	tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
  	depends on HAS_IOMEM && EXPERIMENTAL
  	select VIRTIO
- 	select VIRTIO_RING
  	---help---
  	 This drivers provides support for memory mapped virtio
 	 platform device driver.
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 5a4c63cfd38..9076635697b 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,4 @@
-obj-$(CONFIG_VIRTIO) += virtio.o
-obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
-- 
cgit v1.2.3-70-g09d2


From fd0f5869724ff6195c6e7f12f8287c66a132e0ba Mon Sep 17 00:00:00 2001
From: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Date: Wed, 26 Sep 2012 11:11:28 +0900
Subject: x86: Distinguish TLB shootdown interrupts from other functions call
 interrupts

As TLB shootdown requests to other CPU cores are now using function call
interrupts, TLB shootdowns entry in /proc/interrupts is always shown as 0.

This behavior change was introduced by commit 52aec3308db8 ("x86/tlb:
replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR").

This patch reverts TLB shootdowns entry in /proc/interrupts to count TLB
shootdowns separately from the other function call interrupts.

Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Link: http://lkml.kernel.org/r/20120926021128.22212.20440.stgit@hpxw
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/hardirq.h | 4 ++++
 arch/x86/kernel/irq.c          | 4 ++--
 arch/x86/mm/tlb.c              | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d3895dbf4dd..81f04cee5f7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -18,6 +18,10 @@ typedef struct {
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
+	/*
+	 * irq_tlb_count is double-counted in irq_call_count, so it must be
+	 * subtracted from irq_call_count when displaying irq_call_count
+	 */
 	unsigned int irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1f5f1d5d2a0..355b13f6de8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "  Rescheduling interrupts\n");
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
+					irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  Function call interrupts\n");
 	seq_printf(p, "%*s: ", prec, "TLB");
 	for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
 	sum += irq_stats(cpu)->irq_call_count;
-	sum += irq_stats(cpu)->irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
 	sum += irq_stats(cpu)->irq_thermal_count;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a085c560b4a..0777f042e40 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -98,6 +98,8 @@ static void flush_tlb_func(void *info)
 {
 	struct flush_tlb_info *f = info;
 
+	inc_irq_stat(irq_tlb_count);
+
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From 785107923a83d8456bbd8564e288a24d84109a46 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:55:44 -0700
Subject: efi: Defer freeing boot services memory until after ACPI init

Some new ACPI 5.0 tables reference resources stored in boot services
memory, so keep that memory around until we have ACPI and can extract
data from it.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/baaa6d44bdc4eb0c58e5d1b4ccd2c729f854ac55.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 31 ++++++++++++++++++-------------
 include/linux/efi.h         |  5 +++++
 init/main.c                 |  3 +++
 3 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f55a4ce6dc4..b3dbbdbd2a4 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -419,10 +419,21 @@ void __init efi_reserve_boot_services(void)
 	}
 }
 
-static void __init efi_free_boot_services(void)
+static void __init efi_unmap_memmap(void)
+{
+	if (memmap.map) {
+		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+		memmap.map = NULL;
+	}
+}
+
+void __init efi_free_boot_services(void)
 {
 	void *p;
 
+	if (!efi_native)
+		return;
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		efi_memory_desc_t *md = p;
 		unsigned long long start = md->phys_addr;
@@ -438,6 +449,8 @@ static void __init efi_free_boot_services(void)
 
 		free_bootmem_late(start, size);
 	}
+
+	efi_unmap_memmap();
 }
 
 static int __init efi_systab_init(void *phys)
@@ -787,8 +800,10 @@ void __init efi_enter_virtual_mode(void)
 	 * non-native EFI
 	 */
 
-	if (!efi_native)
-		goto out;
+	if (!efi_native) {
+		efi_unmap_memmap();
+		return;
+	}
 
 	/* Merge contiguous regions of the same type and attribute */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -877,13 +892,6 @@ void __init efi_enter_virtual_mode(void)
 		panic("EFI call to SetVirtualAddressMap() failed!");
 	}
 
-	/*
-	 * Thankfully, it does seem that no runtime services other than
-	 * SetVirtualAddressMap() will touch boot services code, so we can
-	 * get rid of it all at this point
-	 */
-	efi_free_boot_services();
-
 	/*
 	 * Now that EFI is in virtual mode, update the function
 	 * pointers in the runtime service table to the new virtual addresses.
@@ -907,9 +915,6 @@ void __init efi_enter_virtual_mode(void)
 	if (__supported_pte_mask & _PAGE_NX)
 		runtime_code_page_mkexec();
 
-out:
-	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
-	memmap.map = NULL;
 	kfree(new_memmap);
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec45ccd8708..5782114f483 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -496,6 +496,11 @@ extern void efi_map_pal_code (void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
+#ifdef CONFIG_X86
+extern void efi_free_boot_services(void);
+#else
+static inline void efi_free_boot_services(void) {}
+#endif
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
diff --git a/init/main.c b/init/main.c
index b28673087ac..d61ec542205 100644
--- a/init/main.c
+++ b/init/main.c
@@ -631,6 +631,9 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
+	if (efi_enabled)
+		efi_free_boot_services();
+
 	ftrace_init();
 
 	/* Do the rest non-__init'ed, we're now alive */
-- 
cgit v1.2.3-70-g09d2


From 7bc90e01c3f66c137e7e761f574bbf883087d590 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:56:08 -0700
Subject: efi: Add a function to look up existing IO memory mappings

The EFI initialization creates virtual mappings for EFI boot services
memory, so if a driver wants to access EFI boot services memory, it
cannot call ioremap itself; doing so will trip the WARN about mapping
RAM twice.  Thus, a driver accessing EFI boot services memory must do so
via the existing mapping already created during EFI intiialization.
Since the EFI code already maintains a memory map for that memory, add a
function efi_lookup_mapped_addr to look up mappings in that memory map.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/0eb48ae012797912874919110660ad420b90268b.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 28 ++++++++++++++++++++++++++++
 include/linux/efi.h         |  1 +
 2 files changed, 29 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b3dbbdbd2a4..f7f928c315d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -776,6 +776,34 @@ static void __init runtime_code_page_mkexec(void)
 	}
 }
 
+/*
+ * We can't ioremap data in EFI boot services RAM, because we've already mapped
+ * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
+ * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+ */
+void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+{
+	void *p;
+	if (WARN_ON(!memmap.map))
+		return NULL;
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		u64 end = md->phys_addr + size;
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+		if (!md->virt_addr)
+			continue;
+		if (phys_addr >= md->phys_addr && phys_addr < end) {
+			phys_addr += md->virt_addr - md->phys_addr;
+			return (__force void __iomem *)(unsigned long)phys_addr;
+		}
+	}
+	return NULL;
+}
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5782114f483..fff135d9375 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -501,6 +501,7 @@ extern void efi_free_boot_services(void);
 #else
 static inline void efi_free_boot_services(void) {}
 #endif
+extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
-- 
cgit v1.2.3-70-g09d2


From 2223af389032425e3d1a70f9cb3a63feaa654ced Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:57:05 -0700
Subject: efi: Fix the ACPI BGRT driver for images located in EFI boot services
 memory

The ACPI BGRT driver accesses the BIOS logo image when it initializes.
However, ACPI 5.0 (which introduces the BGRT) recommends putting the
logo image in EFI boot services memory, so that the OS can reclaim that
memory.  Production systems follow this recommendation, breaking the
ACPI BGRT driver.

Move the bulk of the BGRT code to run during a new EFI late
initialization phase, which occurs after switching EFI to virtual mode,
and after initializing ACPI, but before freeing boot services memory.
Copy the BIOS logo image to kernel memory at that point, and make it
accessible to the BGRT driver.  Rework the existing ACPI BGRT driver to
act as a simple wrapper exposing that image (and the properties from the
BGRT) via sysfs.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/93ce9f823f1c1f3bb88bdd662cce08eee7a17f5d.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/Makefile   |  1 +
 arch/x86/platform/efi/efi-bgrt.c | 76 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/platform/efi/efi.c      |  6 ++++
 drivers/acpi/Kconfig             |  4 +--
 drivers/acpi/bgrt.c              | 76 +++++-----------------------------------
 include/linux/efi-bgrt.h         | 21 +++++++++++
 include/linux/efi.h              |  2 ++
 init/main.c                      |  4 ++-
 8 files changed, 120 insertions(+), 70 deletions(-)
 create mode 100644 arch/x86/platform/efi/efi-bgrt.c
 create mode 100644 include/linux/efi-bgrt.h

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 73b8be0f367..6db1cc4c753 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
+obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
new file mode 100644
index 00000000000..f6a0c1b8e51
--- /dev/null
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2012 Intel Corporation
+ * Author: Josh Triplett <josh@joshtriplett.org>
+ *
+ * Based on the bgrt driver:
+ * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
+ * Author: Matthew Garrett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/efi-bgrt.h>
+
+struct acpi_table_bgrt *bgrt_tab;
+void *bgrt_image;
+size_t bgrt_image_size;
+
+struct bmp_header {
+	u16 id;
+	u32 size;
+} __packed;
+
+void efi_bgrt_init(void)
+{
+	acpi_status status;
+	void __iomem *image;
+	bool ioremapped = false;
+	struct bmp_header bmp_header;
+
+	if (acpi_disabled)
+		return;
+
+	status = acpi_get_table("BGRT", 0,
+	                        (struct acpi_table_header **)&bgrt_tab);
+	if (ACPI_FAILURE(status))
+		return;
+
+	if (bgrt_tab->version != 1)
+		return;
+	if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
+		return;
+
+	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
+	if (!image) {
+		image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
+		ioremapped = true;
+		if (!image)
+			return;
+	}
+
+	memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
+	if (ioremapped)
+		iounmap(image);
+	bgrt_image_size = bmp_header.size;
+
+	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
+	if (!bgrt_image)
+		return;
+
+	if (ioremapped) {
+		image = ioremap(bgrt_tab->image_address, bmp_header.size);
+		if (!image) {
+			kfree(bgrt_image);
+			bgrt_image = NULL;
+			return;
+		}
+	}
+
+	memcpy_fromio(bgrt_image, image, bgrt_image_size);
+	if (ioremapped)
+		iounmap(image);
+}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f7f928c315d..aded2a91162 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/efi.h>
+#include <linux/efi-bgrt.h>
 #include <linux/export.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
@@ -745,6 +746,11 @@ void __init efi_init(void)
 #endif
 }
 
+void __init efi_late_init(void)
+{
+	efi_bgrt_init();
+}
+
 void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
 {
 	u64 addr, npages;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 80998958cf4..119d58db834 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -385,8 +385,8 @@ config ACPI_CUSTOM_METHOD
 	  to override that restriction).
 
 config ACPI_BGRT
-        tristate "Boottime Graphics Resource Table support"
-        default n
+	bool "Boottime Graphics Resource Table support"
+	depends on EFI
         help
 	  This driver adds support for exposing the ACPI Boottime Graphics
 	  Resource Table, which allows the operating system to obtain
diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index 6680df36b96..be603995854 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
+ * Copyright 2012 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,20 +12,10 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/sysfs.h>
-#include <linux/io.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/efi-bgrt.h>
 
-static struct acpi_table_bgrt *bgrt_tab;
 static struct kobject *bgrt_kobj;
 
-struct bmp_header {
-	u16 id;
-	u32 size;
-} __attribute ((packed));
-
-static struct bmp_header bmp_header;
-
 static ssize_t show_version(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -63,18 +54,7 @@ static DEVICE_ATTR(yoffset, S_IRUGO, show_yoffset, NULL);
 static ssize_t show_image(struct file *file, struct kobject *kobj,
 	       struct bin_attribute *attr, char *buf, loff_t off, size_t count)
 {
-	int size = attr->size;
-	void __iomem *image = attr->private;
-
-	if (off >= size) {
-		count = 0;
-	} else {
-		if (off + count > size)
-			count = size - off;
-
-		memcpy_fromio(buf, image+off, count);
-	}
-
+	memcpy(buf, attr->private + off, count);
 	return count;
 }
 
@@ -101,45 +81,18 @@ static struct attribute_group bgrt_attribute_group = {
 
 static int __init bgrt_init(void)
 {
-	acpi_status status;
 	int ret;
-	void __iomem *bgrt;
 
-	if (acpi_disabled)
-		return -ENODEV;
-
-	status = acpi_get_table("BGRT", 0,
-				(struct acpi_table_header **)&bgrt_tab);
-
-	if (ACPI_FAILURE(status))
+	if (!bgrt_image)
 		return -ENODEV;
 
 	sysfs_bin_attr_init(&image_attr);
-
-	bgrt = ioremap(bgrt_tab->image_address, sizeof(struct bmp_header));
-
-	if (!bgrt) {
-		ret = -EINVAL;
-		goto out_err;
-	}
-
-	memcpy_fromio(&bmp_header, bgrt, sizeof(bmp_header));
-	image_attr.size = bmp_header.size;
-	iounmap(bgrt);
-
-	image_attr.private = ioremap(bgrt_tab->image_address, image_attr.size);
-
-	if (!image_attr.private) {
-		ret = -EINVAL;
-		goto out_err;
-	}
-
+	image_attr.private = bgrt_image;
+	image_attr.size = bgrt_image_size;
 
 	bgrt_kobj = kobject_create_and_add("bgrt", acpi_kobj);
-	if (!bgrt_kobj) {
-		ret = -EINVAL;
-		goto out_iounmap;
-	}
+	if (!bgrt_kobj)
+		return -EINVAL;
 
 	ret = sysfs_create_group(bgrt_kobj, &bgrt_attribute_group);
 	if (ret)
@@ -155,22 +108,11 @@ out_group:
 	sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group);
 out_kobject:
 	kobject_put(bgrt_kobj);
-out_iounmap:
-	iounmap(image_attr.private);
-out_err:
 	return ret;
 }
 
-static void __exit bgrt_exit(void)
-{
-	iounmap(image_attr.private);
-	sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group);
-	sysfs_remove_bin_file(bgrt_kobj, &image_attr);
-}
-
 module_init(bgrt_init);
-module_exit(bgrt_exit);
 
-MODULE_AUTHOR("Matthew Garrett");
+MODULE_AUTHOR("Matthew Garrett, Josh Triplett <josh@joshtriplett.org>");
 MODULE_DESCRIPTION("BGRT boot graphic support");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h
new file mode 100644
index 00000000000..051b21fedf6
--- /dev/null
+++ b/include/linux/efi-bgrt.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_EFI_BGRT_H
+#define _LINUX_EFI_BGRT_H
+
+#ifdef CONFIG_ACPI_BGRT
+
+#include <linux/acpi.h>
+
+void efi_bgrt_init(void);
+
+/* The BGRT data itself; only valid if bgrt_image != NULL. */
+extern void *bgrt_image;
+extern size_t bgrt_image_size;
+extern struct acpi_table_bgrt *bgrt_tab;
+
+#else /* !CONFIG_ACPI_BGRT */
+
+static inline void efi_bgrt_init(void) {}
+
+#endif /* !CONFIG_ACPI_BGRT */
+
+#endif /* _LINUX_EFI_BGRT_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index fff135d9375..8670eb1eb8c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -497,8 +497,10 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
 #ifdef CONFIG_X86
+extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
 #else
+static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
diff --git a/init/main.c b/init/main.c
index d61ec542205..db34c0ec471 100644
--- a/init/main.c
+++ b/init/main.c
@@ -631,8 +631,10 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
-	if (efi_enabled)
+	if (efi_enabled) {
+		efi_late_init();
 		efi_free_boot_services();
+	}
 
 	ftrace_init();
 
-- 
cgit v1.2.3-70-g09d2


From 7076aada1040de4ed79a5977dbabdb5e5ea5e249 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Sep 2012 16:44:54 -0400
Subject: x86: split ret_from_fork

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/processor.h |  5 -----
 arch/x86/kernel/entry_32.S       | 15 ++++++++++-----
 arch/x86/kernel/entry_64.S       | 27 +++++++++++----------------
 arch/x86/kernel/process.c        | 38 --------------------------------------
 arch/x86/kernel/process_32.c     | 31 ++++++++++++++++++++++++-------
 arch/x86/kernel/process_64.c     | 35 +++++++++++++++++++++--------------
 7 files changed, 67 insertions(+), 85 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4ab..d93eb9d1bb9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select GENERIC_KERNEL_THREAD
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bca..078f3fdedf9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -589,11 +589,6 @@ typedef struct {
 } mm_segment_t;
 
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f2883747..ac1107346fc 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -994,15 +994,20 @@ END(spurious_interrupt_bug)
  */
 	.popsection
 
-ENTRY(kernel_thread_helper)
-	pushl $0		# fake return address for unwinder
+ENTRY(ret_from_kernel_thread)
 	CFI_STARTPROC
-	movl %edi,%eax
-	call *%esi
+	pushl_cfi %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
+ENDPROC(ret_from_kernel_thread)
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834..5526d17db67 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -450,7 +450,7 @@ ENTRY(ret_from_fork)
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   retint_restore_args
+	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
@@ -458,6 +458,16 @@ ENTRY(ret_from_fork)
 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
+1:
+	subq $REST_SKIP, %rsp	# move the stack pointer back
+	CFI_ADJUST_CFA_OFFSET	REST_SKIP
+	movq %rbp, %rdi
+	call *%rbx
+	# exit
+	mov %eax, %edi
+	call do_exit
+	ud2			# padding for call trace
+
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -1206,21 +1216,6 @@ bad_gs:
 	jmp  2b
 	.previous
 
-ENTRY(kernel_thread_helper)
-	pushq $0		# fake return address
-	CFI_STARTPROC
-	/*
-	 * Here we are in the child and the registers are set as they were
-	 * at kernel_thread() invocation in the parent.
-	 */
-	call *%rsi
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-END(kernel_thread_helper)
-
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  *
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7162e9c1f59..6947ec968bf 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -298,44 +298,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
-/*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.si = (unsigned long) fn;
-	regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
-	regs.ds = __USER_DS;
-	regs.es = __USER_DS;
-	regs.fs = __KERNEL_PERCPU;
-	regs.gs = __KERNEL_STACK_CANARY;
-#else
-	regs.ss = __KERNEL_DS;
-#endif
-
-	regs.orig_ax = -1;
-	regs.ip = (unsigned long) kernel_thread_helper;
-	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 /*
  * sys_execve() executes a new program.
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 75fcad146de..c9939875d26 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
 #include <asm/switch_to.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 /*
  * Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long unused,
+	unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
 	int err;
 
-	childregs = task_pt_regs(p);
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.ip = (unsigned long) ret_from_kernel_thread;
+		task_user_gs(p) = __KERNEL_STACK_CANARY;
+		childregs->ds = __USER_DS;
+		childregs->es = __USER_DS;
+		childregs->fs = __KERNEL_PERCPU;
+		childregs->bx = sp;	/* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		p->fpu_counter = 0;
+		p->thread.io_bitmap_ptr = NULL;
+		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+		return 0;
+	}
 	*childregs = *regs;
 	childregs->ax = 0;
 	childregs->sp = sp;
 
-	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
-
 	p->thread.ip = (unsigned long) ret_from_fork;
-
 	task_user_gs(p) = get_user_gs(regs);
 
 	p->fpu_counter = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb..937f2af6f2d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
 	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
-	childregs = ((struct pt_regs *)
-			(THREAD_SIZE + task_stack_page(p))) - 1;
-	*childregs = *regs;
-
-	childregs->ax = 0;
-	if (user_mode(regs))
-		childregs->sp = sp;
-	else
-		childregs->sp = (unsigned long)childregs;
-
+	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
 	p->thread.usersp = me->thread.usersp;
-
 	set_tsk_thread_flag(p, TIF_FORK);
-
 	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->sp = (unsigned long)childregs;
+		childregs->ss = __KERNEL_DS;
+		childregs->bx = sp; /* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		return 0;
+	}
+	*childregs = *regs;
+
+	childregs->ax = 0;
+	childregs->sp = sp;
 
 	err = -ENOMEM;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-- 
cgit v1.2.3-70-g09d2


From 6783eaa2e1253fbcbe2c2f6bb4c843abf1343caf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 2 Aug 2012 23:05:11 +0400
Subject: x86, um/x86: switch to generic sys_execve and kernel_execve

32bit wrapper is lost on that; 64bit one is *not*, since
we need to arrange for full pt_regs on stack when we call
sys_execve() and we need to load callee-saved ones from
there afterwards.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/exec.c            | 25 ++-------------------
 arch/um/kernel/internal.h        |  1 -
 arch/um/kernel/syscall.c         | 17 ---------------
 arch/x86/ia32/ia32entry.S        |  2 +-
 arch/x86/ia32/sys_ia32.c         | 15 -------------
 arch/x86/include/asm/sys_ia32.h  |  2 --
 arch/x86/include/asm/syscalls.h  |  2 +-
 arch/x86/include/asm/unistd.h    |  2 ++
 arch/x86/kernel/Makefile         |  2 +-
 arch/x86/kernel/asm-offsets.c    |  3 +++
 arch/x86/kernel/entry_32.S       | 11 +++++++---
 arch/x86/kernel/entry_64.S       | 47 ++++++++++++----------------------------
 arch/x86/kernel/process.c        | 19 ----------------
 arch/x86/kernel/process_32.c     |  1 +
 arch/x86/kernel/sys_i386_32.c    | 40 ----------------------------------
 arch/x86/syscalls/syscall_32.tbl |  2 +-
 arch/x86/um/sys_call_table_32.c  |  1 -
 17 files changed, 34 insertions(+), 158 deletions(-)
 delete mode 100644 arch/um/kernel/internal.h
 delete mode 100644 arch/x86/kernel/sys_i386_32.c

(limited to 'arch/x86')

diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 8c82786da82..e427301f55d 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -16,7 +16,6 @@
 #include "mem_user.h"
 #include "skas.h"
 #include "os.h"
-#include "internal.h"
 
 void flush_thread(void)
 {
@@ -49,27 +48,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 }
 EXPORT_SYMBOL(start_thread);
 
-long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env)
+void __noreturn ret_from_kernel_execve(struct pt_regs *unused)
 {
-	long err;
-
-	err = do_execve(file, argv, env, &current->thread.regs);
-	if (!err)
-		UML_LONGJMP(current->thread.exec_buf, 1);
-	return err;
-}
-
-long sys_execve(const char __user *file, const char __user *const __user *argv,
-		const char __user *const __user *env)
-{
-	long error;
-	char *filename;
-
-	filename = getname(file);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename)) goto out;
-	error = do_execve(filename, argv, env, &current->thread.regs);
-	putname(filename);
- out:
-	return error;
+	UML_LONGJMP(current->thread.exec_buf, 1);
 }
diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h
deleted file mode 100644
index 5bf97db24a0..00000000000
--- a/arch/um/kernel/internal.h
+++ /dev/null
@@ -1 +0,0 @@
-extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env);
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a4c6d8eee74..a5639c47277 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -13,7 +13,6 @@
 #include "asm/mman.h"
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
-#include "internal.h"
 
 long sys_fork(void)
 {
@@ -50,19 +49,3 @@ long old_mmap(unsigned long addr, unsigned long len,
  out:
 	return err;
 }
-
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	mm_segment_t fs;
-	int ret;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = um_execve(filename, (const char __user *const __user *)argv,
-			(const char __user *const __user *) envp);
-	set_fs(fs);
-
-	return ret;
-}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20e5f7ba0e6..e75f941bd2b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -459,7 +459,7 @@ GLOBAL(\label)
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
 	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
-	PTREGSCALL stub32_execve, sys32_execve, %rcx
+	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_clone, sys32_clone, %rdx
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4540bece094..6b31144589d 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,21 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
 	return ret;
 }
 
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
-			     compat_uptr_t __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = compat_do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 			    struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 3fda9db4881..1ac127f41fe 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,
-			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
 
 long sys32_lseek(unsigned int, int, unsigned int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index f1d8b441fc7..2be0b880417 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,7 +25,7 @@ int sys_fork(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
 long sys_execve(const char __user *,
 		const char __user *const __user *,
-		const char __user *const __user *, struct pt_regs *);
+		const char __user *const __user *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0d9776e9e2d..55d155560fd 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,6 +50,8 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
+# define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d9..56610000223 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
-obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc962e..28610822fb3 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@ void common(void) {
 	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 	OFFSET(BP_pref_address, boot_params, hdr.pref_address);
 	OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+	BLANK();
+	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index ac1107346fc..b6bb6923929 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -298,6 +298,13 @@ ENTRY(ret_from_fork)
 	CFI_ENDPROC
 END(ret_from_fork)
 
+ENTRY(ret_from_kernel_execve)
+	movl %eax, %esp
+	movl $0,PT_EAX(%esp)
+	GET_THREAD_INFO(%ebp)
+	jmp syscall_exit
+END(ret_from_kernel_execve)
+
 /*
  * Interrupt exit functions should be protected against kprobes
  */
@@ -322,8 +329,7 @@ ret_from_intr:
 	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 #else
 	/*
-	 * We can be coming here from a syscall done in the kernel space,
-	 * e.g. a failed kernel_execve().
+	 * We can be coming here from child spawned by kernel_thread().
 	 */
 	movl PT_CS(%esp), %eax
 	andl $SEGMENT_RPL_MASK, %eax
@@ -727,7 +733,6 @@ ENDPROC(ptregs_##name)
 PTREGSCALL1(iopl)
 PTREGSCALL0(fork)
 PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
 PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5526d17db67..053c9552ffd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -767,7 +767,6 @@ ENTRY(stub_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
 	call sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
@@ -817,8 +816,7 @@ ENTRY(stub_x32_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
-	call sys32_execve
+	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
@@ -1216,36 +1214,19 @@ bad_gs:
 	jmp  2b
 	.previous
 
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- *	 extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- *	rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
-	CFI_STARTPROC
-	FAKE_STACK_FRAME $0
-	SAVE_ALL
-	movq %rsp,%rcx
-	call sys_execve
-	movq %rax, RAX(%rsp)
-	RESTORE_REST
-	testq %rax,%rax
-	je int_ret_from_sys_call
-	RESTORE_ARGS
-	UNFAKE_STACK_FRAME
-	ret
-	CFI_ENDPROC
-END(kernel_execve)
+ENTRY(ret_from_kernel_execve)
+	movq %rdi, %rsp
+	movl $0, RAX(%rsp)
+	// RESTORE_REST
+	movq 0*8(%rsp), %r15
+	movq 1*8(%rsp), %r14
+	movq 2*8(%rsp), %r13
+	movq 3*8(%rsp), %r12
+	movq 4*8(%rsp), %rbp
+	movq 5*8(%rsp), %rbx
+	addq $(6*8), %rsp
+	jmp int_ret_from_sys_call
+END(ret_from_kernel_execve)
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6947ec968bf..eae2dd5cd5a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -298,25 +298,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 /*
  * Idle related variables and functions
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c9939875d26..25e7e9390d2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -207,6 +207,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+	regs->flags		= X86_EFLAGS_IF;
 	/*
 	 * Free the old FP and other extended state
 	 */
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5fede1..00000000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	asm volatile ("int $0x80"
-	: "=a" (__res)
-	: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
-	return __res;
-}
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 7a35a6e71d4..a47103fbc69 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -17,7 +17,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			ptregs_execve			stub32_execve
+11	i386	execve			sys_execve			stub32_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time			compat_sys_time
 14	i386	mknod			sys_mknod
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index b5408cecac6..232e60504b3 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,7 +25,6 @@
 #define old_mmap sys_old_mmap
 
 #define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_clone i386_clone
-- 
cgit v1.2.3-70-g09d2


From 1f02ab4a237086095bd584f3446da307ac2d02e6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Sep 2012 20:32:29 -0400
Subject: um: switch to generic kernel_thread()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  2 --
 arch/um/kernel/process.c                | 17 +++--------------
 arch/x86/um/Kconfig                     |  1 +
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 33a6a2423bd..5d9ab0c4f48 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -63,8 +63,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long thread_saved_pc(struct task_struct *t);
 
 static inline void mm_copy_segments(struct mm_struct *from_mm,
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5f5afa5074..a1b50add48a 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -69,18 +69,6 @@ unsigned long alloc_stack(int order, int atomic)
 	return page;
 }
 
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	int pid;
-
-	current->thread.request.u.thread.proc = fn;
-	current->thread.request.u.thread.arg = arg;
-	pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
-		      &current->thread.regs, 0, NULL, NULL);
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 static inline void set_current(struct task_struct *task)
 {
 	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
@@ -177,7 +165,7 @@ void fork_handler(void)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long stack_top, struct task_struct * p,
+		unsigned long arg, struct task_struct * p,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
@@ -198,7 +186,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
-		p->thread.request.u.thread = current->thread.request.u.thread;
+		p->thread.request.u.thread.proc = (int (*)(void *))sp;
+		p->thread.request.u.thread.arg = (void *)arg;
 		handler = new_thread_handler;
 	}
 
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772..da85b6fc8e8 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,7 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_KERNEL_THREAD
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
-- 
cgit v1.2.3-70-g09d2


From 969ae0bfb079de892a95aa4ca1e8ac76c4beb57c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 May 2012 01:07:39 -0400
Subject: x86: get rid of duplicate code in case of CONFIG_VM86

no need to have the call of do_notify_resume() + checks around it
duplicated for vm86 case - a bit of rearranging of ifdefs and we'll
have a perfectly fine copy to jump back to.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/entry_32.S | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b6bb6923929..fe4cc305d8d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -618,6 +618,10 @@ work_notifysig:				# deal with pending signals and
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
+1:
+#else
+	movl %esp, %eax
+#endif
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	movb PT_CS(%esp), %bl
@@ -628,24 +632,15 @@ work_notifysig:				# deal with pending signals and
 	call do_notify_resume
 	jmp resume_userspace
 
+#ifdef CONFIG_VM86
 	ALIGN
 work_notifysig_v86:
 	pushl_cfi %ecx			# save ti_flags for do_notify_resume
 	call save_v86_state		# %eax contains pt_regs pointer
 	popl_cfi %ecx
 	movl %eax, %esp
-#else
-	movl %esp, %eax
+	jmp 1b
 #endif
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	movb PT_CS(%esp), %bl
-	andb $SEGMENT_RPL_MASK, %bl
-	cmpb $USER_RPL, %bl
-	jb resume_kernel
-	xorl %edx, %edx
-	call do_notify_resume
-	jmp resume_userspace
 END(work_pending)
 
 	# perform syscall exit tracing
-- 
cgit v1.2.3-70-g09d2


From a1ce39288e6fbefdd8d607021d02384eb4a20b99 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:25 +0100
Subject: UAPI: (Scripted) Convert #include "..." to #include <path/...> in
 kernel system headers

Convert #include "..." to #include <path/...> in kernel system headers.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/arm/include/asm/page.h                      |  2 +-
 arch/arm/include/asm/pgtable.h                   |  2 +-
 arch/arm/include/asm/vfpmacros.h                 |  2 +-
 arch/cris/include/arch-v10/arch/sv_addr_ag.h     |  2 +-
 arch/cris/include/arch-v10/arch/svinto.h         |  2 +-
 arch/cris/include/arch-v32/arch/dma.h            |  2 +-
 arch/cris/include/arch-v32/arch/hwregs/dma.h     |  2 +-
 arch/m68k/include/asm/cacheflush.h               |  4 +--
 arch/m68k/include/asm/io.h                       |  4 +--
 arch/m68k/include/asm/m68360.h                   |  8 +++---
 arch/m68k/include/asm/m68360_enet.h              |  2 +-
 arch/m68k/include/asm/page.h                     |  4 +--
 arch/m68k/include/asm/pgtable.h                  |  4 +--
 arch/m68k/include/asm/q40_master.h               |  2 +-
 arch/m68k/include/asm/uaccess.h                  |  4 +--
 arch/microblaze/include/asm/mmu_context.h        |  2 +-
 arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h  |  2 +-
 arch/mips/include/asm/mach-pnx833x/gpio.h        |  2 +-
 arch/mips/include/asm/octeon/cvmx-asm.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx-cmd-queue.h    |  2 +-
 arch/mips/include/asm/octeon/cvmx-fpa.h          |  4 +--
 arch/mips/include/asm/octeon/cvmx-helper-board.h |  2 +-
 arch/mips/include/asm/octeon/cvmx-helper.h       | 20 ++++++-------
 arch/mips/include/asm/octeon/cvmx-mdio.h         |  2 +-
 arch/mips/include/asm/octeon/cvmx-pip.h          |  6 ++--
 arch/mips/include/asm/octeon/cvmx-pko.h          |  8 +++---
 arch/mips/include/asm/octeon/cvmx-pow.h          |  4 +--
 arch/mips/include/asm/octeon/cvmx-spi.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx-spinlock.h     |  2 +-
 arch/mips/include/asm/octeon/cvmx-wqe.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx.h              | 36 ++++++++++++------------
 arch/mips/include/asm/octeon/octeon-model.h      |  2 +-
 arch/mips/include/asm/octeon/octeon.h            |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_int.h       |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_l2c.h       |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_mc.h        |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_regs.h      |  4 +--
 arch/mips/include/asm/sibyte/bcm1480_scd.h       |  4 +--
 arch/mips/include/asm/sibyte/sb1250_dma.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_genbus.h     |  2 +-
 arch/mips/include/asm/sibyte/sb1250_int.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_l2c.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_ldt.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_mac.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_mc.h         |  2 +-
 arch/mips/include/asm/sibyte/sb1250_regs.h       |  2 +-
 arch/mips/include/asm/sibyte/sb1250_scd.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_smbus.h      |  2 +-
 arch/mips/include/asm/sibyte/sb1250_syncser.h    |  2 +-
 arch/mips/include/asm/sibyte/sb1250_uart.h       |  2 +-
 arch/powerpc/include/asm/ps3.h                   |  2 +-
 arch/powerpc/include/asm/ucc_fast.h              |  2 +-
 arch/powerpc/include/asm/ucc_slow.h              |  2 +-
 arch/sh/include/asm/bl_bit.h                     |  4 +--
 arch/sh/include/asm/cache_insns.h                |  4 +--
 arch/sh/include/asm/checksum.h                   |  2 +-
 arch/sh/include/asm/mmu_context.h                |  4 +--
 arch/sh/include/asm/posix_types.h                |  8 +++---
 arch/sh/include/asm/processor.h                  |  4 +--
 arch/sh/include/asm/ptrace.h                     |  4 +--
 arch/sh/include/asm/string.h                     |  4 +--
 arch/sh/include/asm/switch_to.h                  |  4 +--
 arch/sh/include/asm/syscall.h                    |  4 +--
 arch/sh/include/asm/syscalls.h                   |  4 +--
 arch/sh/include/asm/tlb.h                        |  2 +-
 arch/sh/include/asm/traps.h                      |  4 +--
 arch/sh/include/asm/uaccess.h                    |  4 +--
 arch/sh/include/asm/unistd.h                     |  8 +++---
 arch/sh/include/mach-ecovec24/mach/romimage.h    |  2 +-
 arch/sh/include/mach-kfr2r09/mach/romimage.h     |  2 +-
 arch/tile/include/gxio/dma_queue.h               |  2 +-
 arch/tile/include/gxio/mpipe.h                   |  4 +--
 arch/tile/include/gxio/trio.h                    |  4 +--
 arch/tile/include/gxio/usb_host.h                |  2 +-
 arch/tile/include/hv/iorpc.h                     |  2 +-
 arch/unicore32/include/mach/PKUnity.h            | 36 ++++++++++++------------
 arch/unicore32/include/mach/hardware.h           |  2 +-
 arch/unicore32/include/mach/uncompress.h         |  4 +--
 arch/x86/include/asm/atomic.h                    |  4 +--
 arch/x86/include/asm/calling.h                   |  2 +-
 arch/x86/include/asm/checksum.h                  |  4 +--
 arch/x86/include/asm/cmpxchg.h                   |  4 +--
 arch/x86/include/asm/mmzone.h                    |  4 +--
 arch/x86/include/asm/mutex.h                     |  4 +--
 arch/x86/include/asm/numa.h                      |  4 +--
 arch/x86/include/asm/pci.h                       |  2 +-
 arch/x86/include/asm/pgtable.h                   |  4 +--
 arch/x86/include/asm/pgtable_types.h             |  4 +--
 arch/x86/include/asm/posix_types.h               | 10 +++----
 arch/x86/include/asm/seccomp.h                   |  4 +--
 arch/x86/include/asm/string.h                    |  4 +--
 arch/x86/include/asm/suspend.h                   |  4 +--
 arch/x86/include/asm/uaccess.h                   |  4 +--
 arch/x86/include/asm/user.h                      |  4 +--
 arch/x86/include/asm/xen/interface.h             |  4 +--
 arch/x86/include/asm/xor.h                       |  4 +--
 arch/x86/include/asm/xor_32.h                    |  2 +-
 arch/x86/include/asm/xor_64.h                    |  2 +-
 include/acpi/acpi.h                              | 18 ++++++------
 include/acpi/acpiosxf.h                          |  4 +--
 include/acpi/acpixf.h                            |  6 ++--
 include/acpi/platform/acenv.h                    |  2 +-
 include/acpi/platform/aclinux.h                  |  2 +-
 include/drm/drm.h                                |  2 +-
 include/drm/drmP.h                               | 14 ++++-----
 include/drm/drm_buffer.h                         |  2 +-
 include/drm/drm_encoder_slave.h                  |  4 +--
 include/drm/drm_memory.h                         |  2 +-
 include/drm/drm_sarea.h                          |  2 +-
 include/drm/exynos_drm.h                         |  2 +-
 include/drm/i915_drm.h                           |  2 +-
 include/drm/mga_drm.h                            |  2 +-
 include/drm/radeon_drm.h                         |  2 +-
 include/drm/ttm/ttm_bo_api.h                     |  2 +-
 include/drm/ttm/ttm_bo_driver.h                  | 16 +++++------
 include/drm/ttm/ttm_execbuf_util.h               |  2 +-
 include/drm/ttm/ttm_lock.h                       |  2 +-
 include/drm/ttm/ttm_object.h                     |  2 +-
 include/drm/ttm/ttm_page_alloc.h                 |  4 +--
 include/drm/via_drm.h                            |  2 +-
 include/linux/bcma/bcma.h                        |  2 +-
 include/linux/ceph/ceph_fs.h                     |  4 +--
 include/linux/ceph/debugfs.h                     |  4 +--
 include/linux/ceph/decode.h                      |  2 +-
 include/linux/ceph/libceph.h                     | 14 ++++-----
 include/linux/ceph/mdsmap.h                      |  2 +-
 include/linux/ceph/messenger.h                   |  4 +--
 include/linux/ceph/mon_client.h                  |  2 +-
 include/linux/ceph/msgpool.h                     |  2 +-
 include/linux/ceph/osdmap.h                      |  4 +--
 include/linux/ceph/rados.h                       |  2 +-
 include/linux/ceph/types.h                       |  6 ++--
 include/linux/crush/mapper.h                     |  2 +-
 include/linux/drbd_tag_magic.h                   |  8 +++---
 include/linux/libfdt.h                           |  4 +--
 include/linux/netfilter/nf_conntrack_h323_asn1.h |  2 +-
 include/linux/pinctrl/consumer.h                 |  2 +-
 include/linux/pinctrl/machine.h                  |  2 +-
 include/linux/pinctrl/pinctrl.h                  |  2 +-
 include/linux/pinctrl/pinmux.h                   |  2 +-
 include/scsi/osd_attributes.h                    |  2 +-
 include/scsi/osd_initiator.h                     |  4 +--
 include/scsi/osd_sec.h                           |  4 +--
 include/sound/ac97_codec.h                       |  6 ++--
 include/sound/ad1816a.h                          |  6 ++--
 include/sound/ak4531_codec.h                     |  4 +--
 include/sound/emu10k1_synth.h                    |  4 +--
 include/sound/emu8000.h                          |  4 +--
 include/sound/emux_legacy.h                      |  2 +-
 include/sound/emux_synth.h                       | 14 ++++-----
 include/sound/es1688.h                           |  4 +--
 include/sound/gus.h                              | 10 +++----
 include/sound/mpu401.h                           |  2 +-
 include/sound/pcm.h                              |  2 +-
 include/sound/rawmidi.h                          |  2 +-
 include/sound/sb.h                               |  4 +--
 include/sound/sb16_csp.h                         |  4 +--
 include/sound/seq_kernel.h                       |  2 +-
 include/sound/seq_midi_emul.h                    |  2 +-
 include/sound/seq_midi_event.h                   |  2 +-
 include/sound/seq_oss.h                          |  4 +--
 include/sound/seq_virmidi.h                      |  4 +--
 include/sound/snd_wavefront.h                    |  8 +++---
 include/sound/soundfont.h                        |  4 +--
 include/sound/tea6330t.h                         |  2 +-
 include/sound/wss.h                              |  8 +++---
 include/trace/events/compaction.h                |  2 +-
 include/trace/events/kmem.h                      |  2 +-
 include/trace/events/vmscan.h                    |  2 +-
 include/xen/interface/callback.h                 |  2 +-
 include/xen/interface/hvm/params.h               |  2 +-
 include/xen/interface/io/blkif.h                 |  4 +--
 include/xen/interface/io/netif.h                 |  4 +--
 include/xen/interface/platform.h                 |  2 +-
 include/xen/interface/sched.h                    |  2 +-
 include/xen/interface/version.h                  |  2 +-
 176 files changed, 350 insertions(+), 350 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index ecf901902e4..812a4944e78 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -19,7 +19,7 @@
 
 #ifndef CONFIG_MMU
 
-#include "page-nommu.h"
+#include <asm/page-nommu.h>
 
 #else
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 41dc31f834c..08c12312a1f 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #ifndef CONFIG_MMU
 
 #include <asm-generic/4level-fixup.h>
-#include "pgtable-nommu.h"
+#include <asm/pgtable-nommu.h>
 
 #else
 
diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
index 3d5fc41ae8d..a7aadbd9a6d 100644
--- a/arch/arm/include/asm/vfpmacros.h
+++ b/arch/arm/include/asm/vfpmacros.h
@@ -5,7 +5,7 @@
  */
 #include <asm/hwcap.h>
 
-#include "vfp.h"
+#include <asm/vfp.h>
 
 @ Macros to allow building with old toolkits (with no VFP support)
 	.macro	VFPFMRX, rd, sysreg, cond
diff --git a/arch/cris/include/arch-v10/arch/sv_addr_ag.h b/arch/cris/include/arch-v10/arch/sv_addr_ag.h
index e4a6b68b898..5517f04153a 100644
--- a/arch/cris/include/arch-v10/arch/sv_addr_ag.h
+++ b/arch/cris/include/arch-v10/arch/sv_addr_ag.h
@@ -114,7 +114,7 @@
 
 /*------------------------------------------------------------*/
 
-#include "sv_addr.agh"
+#include <arch/sv_addr.agh>
 
 #if __test_sv_addr__
 /* IO_MASK( R_BUS_CONFIG , CE ) */
diff --git a/arch/cris/include/arch-v10/arch/svinto.h b/arch/cris/include/arch-v10/arch/svinto.h
index 0881a1af7ce..da5c1527265 100644
--- a/arch/cris/include/arch-v10/arch/svinto.h
+++ b/arch/cris/include/arch-v10/arch/svinto.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_CRIS_SVINTO_H
 #define _ASM_CRIS_SVINTO_H
 
-#include "sv_addr_ag.h"
+#include <arch/sv_addr_ag.h>
 
 extern unsigned int genconfig_shadow; /* defined and set in head.S */
 
diff --git a/arch/cris/include/arch-v32/arch/dma.h b/arch/cris/include/arch-v32/arch/dma.h
index 61906153a9a..6f92f4f23f2 100644
--- a/arch/cris/include/arch-v32/arch/dma.h
+++ b/arch/cris/include/arch-v32/arch/dma.h
@@ -1 +1 @@
-#include "mach/dma.h"
+#include <mach/dma.h>
diff --git a/arch/cris/include/arch-v32/arch/hwregs/dma.h b/arch/cris/include/arch-v32/arch/hwregs/dma.h
index 3ce322b5c73..52bf67907f2 100644
--- a/arch/cris/include/arch-v32/arch/hwregs/dma.h
+++ b/arch/cris/include/arch-v32/arch/hwregs/dma.h
@@ -7,7 +7,7 @@
 #define dma_h
 
 /* registers */ /* Really needed, since both are listed in sw.list? */
-#include "dma_defs.h"
+#include <arch/hwregs/dma_defs.h>
 
 
 /* descriptors */
diff --git a/arch/m68k/include/asm/cacheflush.h b/arch/m68k/include/asm/cacheflush.h
index a70d7319630..4fc738209bd 100644
--- a/arch/m68k/include/asm/cacheflush.h
+++ b/arch/m68k/include/asm/cacheflush.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "cacheflush_no.h"
+#include <asm/cacheflush_no.h>
 #else
-#include "cacheflush_mm.h"
+#include <asm/cacheflush_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h
index c7210ba184e..c70cc915500 100644
--- a/arch/m68k/include/asm/io.h
+++ b/arch/m68k/include/asm/io.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "io_no.h"
+#include <asm/io_no.h>
 #else
-#include "io_mm.h"
+#include <asm/io_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/m68360.h b/arch/m68k/include/asm/m68360.h
index eb7d39ef285..4664180a3ab 100644
--- a/arch/m68k/include/asm/m68360.h
+++ b/arch/m68k/include/asm/m68360.h
@@ -1,7 +1,7 @@
-#include "m68360_regs.h"
-#include "m68360_pram.h"
-#include "m68360_quicc.h"
-#include "m68360_enet.h"
+#include <asm/m68360_regs.h>
+#include <asm/m68360_pram.h>
+#include <asm/m68360_quicc.h>
+#include <asm/m68360_enet.h>
 
 #ifdef CONFIG_M68360
 
diff --git a/arch/m68k/include/asm/m68360_enet.h b/arch/m68k/include/asm/m68360_enet.h
index c36f4d05920..4d04037c78a 100644
--- a/arch/m68k/include/asm/m68360_enet.h
+++ b/arch/m68k/include/asm/m68360_enet.h
@@ -10,7 +10,7 @@
 #ifndef __ETHER_H
 #define __ETHER_H
 
-#include "quicc_simple.h"
+#include <asm/quicc_simple.h>
 
 /*
  * transmit BD's
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 98baa82a861..7c360dac00b 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -43,9 +43,9 @@ extern unsigned long _ramend;
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_MMU
-#include "page_mm.h"
+#include <asm/page_mm.h>
 #else
-#include "page_no.h"
+#include <asm/page_no.h>
 #endif
 
 #include <asm-generic/getorder.h>
diff --git a/arch/m68k/include/asm/pgtable.h b/arch/m68k/include/asm/pgtable.h
index ee6759eb445..a3d733b524d 100644
--- a/arch/m68k/include/asm/pgtable.h
+++ b/arch/m68k/include/asm/pgtable.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "pgtable_no.h"
+#include <asm/pgtable_no.h>
 #else
-#include "pgtable_mm.h"
+#include <asm/pgtable_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index 3907a09d4fc..fc5b36278d0 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -60,7 +60,7 @@
 #define Q40_RTC_WRITE  128
 
 /* define some Q40 specific ints */
-#include "q40ints.h"
+#include <asm/q40ints.h>
 
 /* misc defs */
 #define DAC_LEFT  ((unsigned char *)0xff008000)
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index 38f92dbb9a4..639c731568b 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "uaccess_no.h"
+#include <asm/uaccess_no.h>
 #else
-#include "uaccess_mm.h"
+#include <asm/uaccess_mm.h>
 #endif
diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h
index 24eab1674d3..0ccd8c402cd 100644
--- a/arch/microblaze/include/asm/mmu_context.h
+++ b/arch/microblaze/include/asm/mmu_context.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_MMU
-# include "mmu_context_mm.h"
+# include <asm/mmu_context_mm.h>
 #else
 # include <asm-generic/mmu_context.h>
 #endif
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
index 9203d90e610..03a54df5fb8 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
@@ -1,7 +1,7 @@
 #ifndef BCM63XX_IO_H_
 #define BCM63XX_IO_H_
 
-#include "bcm63xx_cpu.h"
+#include <asm/mach-bcm63xx/bcm63xx_cpu.h>
 
 /*
  * Physical memory map, RAM is mapped at 0x0.
diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h
index ed3a88da70f..f192acf4a8a 100644
--- a/arch/mips/include/asm/mach-pnx833x/gpio.h
+++ b/arch/mips/include/asm/mach-pnx833x/gpio.h
@@ -30,7 +30,7 @@
    - including locking between different uses
 */
 
-#include "pnx833x.h"
+#include <asm/mach-pnx833x/pnx833x.h>
 
 #define SET_REG_BIT(reg, bit)		do { (reg |= (1 << (bit))); } while (0)
 #define CLEAR_REG_BIT(reg, bit)		do { (reg &= ~(1 << (bit))); } while (0)
diff --git a/arch/mips/include/asm/octeon/cvmx-asm.h b/arch/mips/include/asm/octeon/cvmx-asm.h
index 5de5de95311..31eacc24b77 100644
--- a/arch/mips/include/asm/octeon/cvmx-asm.h
+++ b/arch/mips/include/asm/octeon/cvmx-asm.h
@@ -32,7 +32,7 @@
 #ifndef __CVMX_ASM_H__
 #define __CVMX_ASM_H__
 
-#include "octeon-model.h"
+#include <asm/octeon/octeon-model.h>
 
 /* other useful stuff */
 #define CVMX_SYNC asm volatile ("sync" : : : "memory")
diff --git a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
index 614653b686a..fed91125317 100644
--- a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
+++ b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
@@ -76,7 +76,7 @@
 
 #include <linux/prefetch.h>
 
-#include "cvmx-fpa.h"
+#include <asm/octeon/cvmx-fpa.h>
 /**
  * By default we disable the max depth support. Most programs
  * don't use it and it slows down the command queue processing
diff --git a/arch/mips/include/asm/octeon/cvmx-fpa.h b/arch/mips/include/asm/octeon/cvmx-fpa.h
index 1f04f965873..541a1ae02b6 100644
--- a/arch/mips/include/asm/octeon/cvmx-fpa.h
+++ b/arch/mips/include/asm/octeon/cvmx-fpa.h
@@ -36,8 +36,8 @@
 #ifndef __CVMX_FPA_H__
 #define __CVMX_FPA_H__
 
-#include "cvmx-address.h"
-#include "cvmx-fpa-defs.h"
+#include <asm/octeon/cvmx-address.h>
+#include <asm/octeon/cvmx-fpa-defs.h>
 
 #define CVMX_FPA_NUM_POOLS      8
 #define CVMX_FPA_MIN_BLOCK_SIZE 128
diff --git a/arch/mips/include/asm/octeon/cvmx-helper-board.h b/arch/mips/include/asm/octeon/cvmx-helper-board.h
index 88527fa835c..442f508eaac 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper-board.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper-board.h
@@ -34,7 +34,7 @@
 #ifndef __CVMX_HELPER_BOARD_H__
 #define __CVMX_HELPER_BOARD_H__
 
-#include "cvmx-helper.h"
+#include <asm/octeon/cvmx-helper.h>
 
 typedef enum {
 	set_phy_link_flags_autoneg = 0x1,
diff --git a/arch/mips/include/asm/octeon/cvmx-helper.h b/arch/mips/include/asm/octeon/cvmx-helper.h
index 0ac6b9f412b..691c8142cd4 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper.h
@@ -34,9 +34,9 @@
 #ifndef __CVMX_HELPER_H__
 #define __CVMX_HELPER_H__
 
-#include "cvmx-config.h"
-#include "cvmx-fpa.h"
-#include "cvmx-wqe.h"
+#include <asm/octeon/cvmx-config.h>
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-wqe.h>
 
 typedef enum {
 	CVMX_HELPER_INTERFACE_MODE_DISABLED,
@@ -62,13 +62,13 @@ typedef union {
 } cvmx_helper_link_info_t;
 
 #include <asm/octeon/cvmx-helper-errata.h>
-#include "cvmx-helper-loop.h"
-#include "cvmx-helper-npi.h"
-#include "cvmx-helper-rgmii.h"
-#include "cvmx-helper-sgmii.h"
-#include "cvmx-helper-spi.h"
-#include "cvmx-helper-util.h"
-#include "cvmx-helper-xaui.h"
+#include <asm/octeon/cvmx-helper-loop.h>
+#include <asm/octeon/cvmx-helper-npi.h>
+#include <asm/octeon/cvmx-helper-rgmii.h>
+#include <asm/octeon/cvmx-helper-sgmii.h>
+#include <asm/octeon/cvmx-helper-spi.h>
+#include <asm/octeon/cvmx-helper-util.h>
+#include <asm/octeon/cvmx-helper-xaui.h>
 
 /**
  * cvmx_override_pko_queue_priority(int ipd_port, uint64_t
diff --git a/arch/mips/include/asm/octeon/cvmx-mdio.h b/arch/mips/include/asm/octeon/cvmx-mdio.h
index d88ab8d8e37..6f0cd182cec 100644
--- a/arch/mips/include/asm/octeon/cvmx-mdio.h
+++ b/arch/mips/include/asm/octeon/cvmx-mdio.h
@@ -35,7 +35,7 @@
 #ifndef __CVMX_MIO_H__
 #define __CVMX_MIO_H__
 
-#include "cvmx-smix-defs.h"
+#include <asm/octeon/cvmx-smix-defs.h>
 
 /**
  * PHY register 0 from the 802.3 spec
diff --git a/arch/mips/include/asm/octeon/cvmx-pip.h b/arch/mips/include/asm/octeon/cvmx-pip.h
index 78dbce8f2c5..9e739a64085 100644
--- a/arch/mips/include/asm/octeon/cvmx-pip.h
+++ b/arch/mips/include/asm/octeon/cvmx-pip.h
@@ -33,9 +33,9 @@
 #ifndef __CVMX_PIP_H__
 #define __CVMX_PIP_H__
 
-#include "cvmx-wqe.h"
-#include "cvmx-fpa.h"
-#include "cvmx-pip-defs.h"
+#include <asm/octeon/cvmx-wqe.h>
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-pip-defs.h>
 
 #define CVMX_PIP_NUM_INPUT_PORTS                40
 #define CVMX_PIP_NUM_WATCHERS                   4
diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h
index de3412aada5..c6daeedf1f8 100644
--- a/arch/mips/include/asm/octeon/cvmx-pko.h
+++ b/arch/mips/include/asm/octeon/cvmx-pko.h
@@ -58,10 +58,10 @@
 #ifndef __CVMX_PKO_H__
 #define __CVMX_PKO_H__
 
-#include "cvmx-fpa.h"
-#include "cvmx-pow.h"
-#include "cvmx-cmd-queue.h"
-#include "cvmx-pko-defs.h"
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-pow.h>
+#include <asm/octeon/cvmx-cmd-queue.h>
+#include <asm/octeon/cvmx-pko-defs.h>
 
 /* Adjust the command buffer size by 1 word so that in the case of using only
  * two word PKO commands no command words stradle buffers.  The useful values
diff --git a/arch/mips/include/asm/octeon/cvmx-pow.h b/arch/mips/include/asm/octeon/cvmx-pow.h
index 999aefe3274..92742b241a5 100644
--- a/arch/mips/include/asm/octeon/cvmx-pow.h
+++ b/arch/mips/include/asm/octeon/cvmx-pow.h
@@ -53,8 +53,8 @@
 
 #include <asm/octeon/cvmx-pow-defs.h>
 
-#include "cvmx-scratch.h"
-#include "cvmx-wqe.h"
+#include <asm/octeon/cvmx-scratch.h>
+#include <asm/octeon/cvmx-wqe.h>
 
 /* Default to having all POW constancy checks turned on */
 #ifndef CVMX_ENABLE_POW_CHECKS
diff --git a/arch/mips/include/asm/octeon/cvmx-spi.h b/arch/mips/include/asm/octeon/cvmx-spi.h
index e814648953a..3bf53b537bc 100644
--- a/arch/mips/include/asm/octeon/cvmx-spi.h
+++ b/arch/mips/include/asm/octeon/cvmx-spi.h
@@ -32,7 +32,7 @@
 #ifndef __CVMX_SPI_H__
 #define __CVMX_SPI_H__
 
-#include "cvmx-gmxx-defs.h"
+#include <asm/octeon/cvmx-gmxx-defs.h>
 
 /* CSR typedefs have been moved to cvmx-csr-*.h */
 
diff --git a/arch/mips/include/asm/octeon/cvmx-spinlock.h b/arch/mips/include/asm/octeon/cvmx-spinlock.h
index 2fbf0871df1..a672abb1bc4 100644
--- a/arch/mips/include/asm/octeon/cvmx-spinlock.h
+++ b/arch/mips/include/asm/octeon/cvmx-spinlock.h
@@ -35,7 +35,7 @@
 #ifndef __CVMX_SPINLOCK_H__
 #define __CVMX_SPINLOCK_H__
 
-#include "cvmx-asm.h"
+#include <asm/octeon/cvmx-asm.h>
 
 /* Spinlocks for Octeon */
 
diff --git a/arch/mips/include/asm/octeon/cvmx-wqe.h b/arch/mips/include/asm/octeon/cvmx-wqe.h
index 653610953d2..df762389e27 100644
--- a/arch/mips/include/asm/octeon/cvmx-wqe.h
+++ b/arch/mips/include/asm/octeon/cvmx-wqe.h
@@ -40,7 +40,7 @@
 #ifndef __CVMX_WQE_H__
 #define __CVMX_WQE_H__
 
-#include "cvmx-packet.h"
+#include <asm/octeon/cvmx-packet.h>
 
 
 #define OCT_TAG_TYPE_STRING(x)						\
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 740be97a325..db58beab6cb 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -52,24 +52,24 @@ enum cvmx_mips_space {
 #define CVMX_ADD_IO_SEG(add) CVMX_ADD_SEG(CVMX_IO_SEG, (add))
 #endif
 
-#include "cvmx-asm.h"
-#include "cvmx-packet.h"
-#include "cvmx-sysinfo.h"
-
-#include "cvmx-ciu-defs.h"
-#include "cvmx-gpio-defs.h"
-#include "cvmx-iob-defs.h"
-#include "cvmx-ipd-defs.h"
-#include "cvmx-l2c-defs.h"
-#include "cvmx-l2d-defs.h"
-#include "cvmx-l2t-defs.h"
-#include "cvmx-led-defs.h"
-#include "cvmx-mio-defs.h"
-#include "cvmx-pow-defs.h"
-
-#include "cvmx-bootinfo.h"
-#include "cvmx-bootmem.h"
-#include "cvmx-l2c.h"
+#include <asm/octeon/cvmx-asm.h>
+#include <asm/octeon/cvmx-packet.h>
+#include <asm/octeon/cvmx-sysinfo.h>
+
+#include <asm/octeon/cvmx-ciu-defs.h>
+#include <asm/octeon/cvmx-gpio-defs.h>
+#include <asm/octeon/cvmx-iob-defs.h>
+#include <asm/octeon/cvmx-ipd-defs.h>
+#include <asm/octeon/cvmx-l2c-defs.h>
+#include <asm/octeon/cvmx-l2d-defs.h>
+#include <asm/octeon/cvmx-l2t-defs.h>
+#include <asm/octeon/cvmx-led-defs.h>
+#include <asm/octeon/cvmx-mio-defs.h>
+#include <asm/octeon/cvmx-pow-defs.h>
+
+#include <asm/octeon/cvmx-bootinfo.h>
+#include <asm/octeon/cvmx-bootmem.h>
+#include <asm/octeon/cvmx-l2c.h>
 
 #ifndef CVMX_ENABLE_DEBUG_PRINTS
 #define CVMX_ENABLE_DEBUG_PRINTS 1
diff --git a/arch/mips/include/asm/octeon/octeon-model.h b/arch/mips/include/asm/octeon/octeon-model.h
index 4e338a4d942..23b895cb260 100644
--- a/arch/mips/include/asm/octeon/octeon-model.h
+++ b/arch/mips/include/asm/octeon/octeon-model.h
@@ -313,6 +313,6 @@ static inline int __octeon_is_model_runtime__(uint32_t model)
 const char *octeon_model_get_string(uint32_t chip_id);
 const char *octeon_model_get_string_buffer(uint32_t chip_id, char *buffer);
 
-#include "octeon-feature.h"
+#include <asm/octeon/octeon-feature.h>
 
 #endif /* __OCTEON_MODEL_H__ */
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index 1e2486e2357..c4a1b31966b 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -8,7 +8,7 @@
 #ifndef __ASM_OCTEON_OCTEON_H
 #define __ASM_OCTEON_OCTEON_H
 
-#include "cvmx.h"
+#include <asm/octeon/cvmx.h>
 
 extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
 						uint64_t alignment,
diff --git a/arch/mips/include/asm/sibyte/bcm1480_int.h b/arch/mips/include/asm/sibyte/bcm1480_int.h
index 6109557c14e..fffb224d229 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_int.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_int.h
@@ -34,7 +34,7 @@
 #ifndef _BCM1480_INT_H
 #define _BCM1480_INT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Interrupt Mapper Constants
diff --git a/arch/mips/include/asm/sibyte/bcm1480_l2c.h b/arch/mips/include/asm/sibyte/bcm1480_l2c.h
index fd75817f7ac..725d38cb9d1 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_l2c.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_l2c.h
@@ -33,7 +33,7 @@
 #ifndef _BCM1480_L2C_H
 #define _BCM1480_L2C_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Format of level 2 cache management address (Table 55)
diff --git a/arch/mips/include/asm/sibyte/bcm1480_mc.h b/arch/mips/include/asm/sibyte/bcm1480_mc.h
index f26a41a82b5..4307a758e3b 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_mc.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_mc.h
@@ -33,7 +33,7 @@
 #ifndef _BCM1480_MC_H
 #define _BCM1480_MC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Memory Channel Configuration Register (Table 81)
diff --git a/arch/mips/include/asm/sibyte/bcm1480_regs.h b/arch/mips/include/asm/sibyte/bcm1480_regs.h
index b4077bb7261..84d168ddfeb 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_regs.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_regs.h
@@ -32,14 +32,14 @@
 #ifndef _BCM1480_REGS_H
 #define _BCM1480_REGS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Pull in the BCM1250's registers since a great deal of the 1480's
     *  functions are the same as the BCM1250.
     ********************************************************************* */
 
-#include "sb1250_regs.h"
+#include <asm/sibyte/sb1250_regs.h>
 
 
 /*  *********************************************************************
diff --git a/arch/mips/include/asm/sibyte/bcm1480_scd.h b/arch/mips/include/asm/sibyte/bcm1480_scd.h
index 25ef24cbb92..2af3706b964 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_scd.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_scd.h
@@ -32,13 +32,13 @@
 #ifndef _BCM1480_SCD_H
 #define _BCM1480_SCD_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Pull in the BCM1250's SCD since lots of stuff is the same.
     ********************************************************************* */
 
-#include "sb1250_scd.h"
+#include <asm/sibyte/sb1250_scd.h>
 
 /*  *********************************************************************
     *  Some general notes:
diff --git a/arch/mips/include/asm/sibyte/sb1250_dma.h b/arch/mips/include/asm/sibyte/sb1250_dma.h
index bad56171d74..6c44dfb5287 100644
--- a/arch/mips/include/asm/sibyte/sb1250_dma.h
+++ b/arch/mips/include/asm/sibyte/sb1250_dma.h
@@ -36,7 +36,7 @@
 #define _SB1250_DMA_H
 
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  DMA Registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_genbus.h b/arch/mips/include/asm/sibyte/sb1250_genbus.h
index 94e9c7c8e78..a96ded17bdc 100644
--- a/arch/mips/include/asm/sibyte/sb1250_genbus.h
+++ b/arch/mips/include/asm/sibyte/sb1250_genbus.h
@@ -34,7 +34,7 @@
 #ifndef _SB1250_GENBUS_H
 #define _SB1250_GENBUS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Generic Bus Region Configuration Registers (Table 11-4)
diff --git a/arch/mips/include/asm/sibyte/sb1250_int.h b/arch/mips/include/asm/sibyte/sb1250_int.h
index f2850b4bcfd..dbea73ddd2f 100644
--- a/arch/mips/include/asm/sibyte/sb1250_int.h
+++ b/arch/mips/include/asm/sibyte/sb1250_int.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_INT_H
 #define _SB1250_INT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Interrupt Mapper Constants
diff --git a/arch/mips/include/asm/sibyte/sb1250_l2c.h b/arch/mips/include/asm/sibyte/sb1250_l2c.h
index 6554dcf05cf..b61a7491607 100644
--- a/arch/mips/include/asm/sibyte/sb1250_l2c.h
+++ b/arch/mips/include/asm/sibyte/sb1250_l2c.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_L2C_H
 #define _SB1250_L2C_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Level 2 Cache Tag register (Table 5-3)
diff --git a/arch/mips/include/asm/sibyte/sb1250_ldt.h b/arch/mips/include/asm/sibyte/sb1250_ldt.h
index 1e76cf13799..bf7f320d1a8 100644
--- a/arch/mips/include/asm/sibyte/sb1250_ldt.h
+++ b/arch/mips/include/asm/sibyte/sb1250_ldt.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_LDT_H
 #define _SB1250_LDT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 #define K_LDT_VENDOR_SIBYTE	0x166D
 #define K_LDT_DEVICE_SB1250	0x0002
diff --git a/arch/mips/include/asm/sibyte/sb1250_mac.h b/arch/mips/include/asm/sibyte/sb1250_mac.h
index 77f78728423..cfc4d787088 100644
--- a/arch/mips/include/asm/sibyte/sb1250_mac.h
+++ b/arch/mips/include/asm/sibyte/sb1250_mac.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_MAC_H
 #define _SB1250_MAC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Ethernet MAC Registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_mc.h b/arch/mips/include/asm/sibyte/sb1250_mc.h
index 1eb1b5a8873..15048dcaf22 100644
--- a/arch/mips/include/asm/sibyte/sb1250_mc.h
+++ b/arch/mips/include/asm/sibyte/sb1250_mc.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_MC_H
 #define _SB1250_MC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Memory Channel Config Register (table 6-14)
diff --git a/arch/mips/include/asm/sibyte/sb1250_regs.h b/arch/mips/include/asm/sibyte/sb1250_regs.h
index 8f53ec817a5..29b9f0b26b3 100644
--- a/arch/mips/include/asm/sibyte/sb1250_regs.h
+++ b/arch/mips/include/asm/sibyte/sb1250_regs.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_REGS_H
 #define _SB1250_REGS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 
 /*  *********************************************************************
diff --git a/arch/mips/include/asm/sibyte/sb1250_scd.h b/arch/mips/include/asm/sibyte/sb1250_scd.h
index e49c3e89b5e..615e165dbd2 100644
--- a/arch/mips/include/asm/sibyte/sb1250_scd.h
+++ b/arch/mips/include/asm/sibyte/sb1250_scd.h
@@ -32,7 +32,7 @@
 #ifndef _SB1250_SCD_H
 #define _SB1250_SCD_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  System control/debug registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_smbus.h b/arch/mips/include/asm/sibyte/sb1250_smbus.h
index 04769923cf1..128d6b75b81 100644
--- a/arch/mips/include/asm/sibyte/sb1250_smbus.h
+++ b/arch/mips/include/asm/sibyte/sb1250_smbus.h
@@ -34,7 +34,7 @@
 #ifndef _SB1250_SMBUS_H
 #define _SB1250_SMBUS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * SMBus Clock Frequency Register (Table 14-2)
diff --git a/arch/mips/include/asm/sibyte/sb1250_syncser.h b/arch/mips/include/asm/sibyte/sb1250_syncser.h
index d4b8558e0bf..274e9179d32 100644
--- a/arch/mips/include/asm/sibyte/sb1250_syncser.h
+++ b/arch/mips/include/asm/sibyte/sb1250_syncser.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_SYNCSER_H
 #define _SB1250_SYNCSER_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Serial Mode Configuration Register
diff --git a/arch/mips/include/asm/sibyte/sb1250_uart.h b/arch/mips/include/asm/sibyte/sb1250_uart.h
index d835bf28014..bb99ecac581 100644
--- a/arch/mips/include/asm/sibyte/sb1250_uart.h
+++ b/arch/mips/include/asm/sibyte/sb1250_uart.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_UART_H
 #define _SB1250_UART_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /* **********************************************************************
    * DUART Registers
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 7f065e178ec..0e15db4d703 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
-#include "cell-pmu.h"
+#include <asm/cell-pmu.h>
 
 union ps3_firmware_version {
 	u64 raw;
diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
index 839aab8bf37..4644c840e2f 100644
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ b/arch/powerpc/include/asm/ucc_fast.h
@@ -19,7 +19,7 @@
 #include <asm/immap_qe.h>
 #include <asm/qe.h>
 
-#include "ucc.h"
+#include <asm/ucc.h>
 
 /* Receive BD's status */
 #define R_E	0x80000000	/* buffer empty */
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index 0980e6ad335..cf131ffdb8d 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h
@@ -20,7 +20,7 @@
 #include <asm/immap_qe.h>
 #include <asm/qe.h>
 
-#include "ucc.h"
+#include <asm/ucc.h>
 
 /* transmit BD's status */
 #define T_R	0x80000000	/* ready bit */
diff --git a/arch/sh/include/asm/bl_bit.h b/arch/sh/include/asm/bl_bit.h
index 45e6b9fc37a..06e4163c674 100644
--- a/arch/sh/include/asm/bl_bit.h
+++ b/arch/sh/include/asm/bl_bit.h
@@ -2,9 +2,9 @@
 #define __ASM_SH_BL_BIT_H
 
 #ifdef CONFIG_SUPERH32
-# include "bl_bit_32.h"
+# include <asm/bl_bit_32.h>
 #else
-# include "bl_bit_64.h"
+# include <asm/bl_bit_64.h>
 #endif
 
 #endif /* __ASM_SH_BL_BIT_H */
diff --git a/arch/sh/include/asm/cache_insns.h b/arch/sh/include/asm/cache_insns.h
index d25fbe53090..355cb06b7a3 100644
--- a/arch/sh/include/asm/cache_insns.h
+++ b/arch/sh/include/asm/cache_insns.h
@@ -3,9 +3,9 @@
 
 
 #ifdef CONFIG_SUPERH32
-# include "cache_insns_32.h"
+# include <asm/cache_insns_32.h>
 #else
-# include "cache_insns_64.h"
+# include <asm/cache_insns_64.h>
 #endif
 
 #endif /* __ASM_SH_CACHE_INSNS_H */
diff --git a/arch/sh/include/asm/checksum.h b/arch/sh/include/asm/checksum.h
index fc26d1f4b59..34ae2620452 100644
--- a/arch/sh/include/asm/checksum.h
+++ b/arch/sh/include/asm/checksum.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_SUPERH32
-# include "checksum_32.h"
+# include <asm/checksum_32.h>
 #else
 # include <asm-generic/checksum.h>
 #endif
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 384c7471a37..21c5088788d 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -46,9 +46,9 @@
 #define MMU_VPN_MASK	0xfffff000
 
 #if defined(CONFIG_SUPERH32)
-#include "mmu_context_32.h"
+#include <asm/mmu_context_32.h>
 #else
-#include "mmu_context_64.h"
+#include <asm/mmu_context_64.h>
 #endif
 
 /*
diff --git a/arch/sh/include/asm/posix_types.h b/arch/sh/include/asm/posix_types.h
index 4eeb723aee7..f08449bcbde 100644
--- a/arch/sh/include/asm/posix_types.h
+++ b/arch/sh/include/asm/posix_types.h
@@ -1,13 +1,13 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_SUPERH32
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #else
 # ifdef __SH5__
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # else
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # endif
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 3d14aeaef57..5448f9bbf4a 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -175,9 +175,9 @@ extern unsigned int instruction_size(unsigned int insn);
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_SUPERH32
-# include "processor_32.h"
+# include <asm/processor_32.h>
 #else
-# include "processor_64.h"
+# include <asm/processor_64.h>
 #endif
 
 #endif /* __ASM_SH_PROCESSOR_H */
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index c7b7e1ed194..a4a38dff997 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -25,9 +25,9 @@
 #define PT_TEXT_LEN		252
 
 #if defined(__SH5__) || defined(CONFIG_CPU_SH5)
-#include "ptrace_64.h"
+#include <asm/ptrace_64.h>
 #else
-#include "ptrace_32.h"
+#include <asm/ptrace_32.h>
 #endif
 
 #ifdef __KERNEL__
diff --git a/arch/sh/include/asm/string.h b/arch/sh/include/asm/string.h
index 8c1ea21dc0a..114011fa08a 100644
--- a/arch/sh/include/asm/string.h
+++ b/arch/sh/include/asm/string.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_SUPERH32
-# include "string_32.h"
+# include <asm/string_32.h>
 #else
-# include "string_64.h"
+# include <asm/string_64.h>
 #endif
diff --git a/arch/sh/include/asm/switch_to.h b/arch/sh/include/asm/switch_to.h
index 62b1941813e..bcd722fc834 100644
--- a/arch/sh/include/asm/switch_to.h
+++ b/arch/sh/include/asm/switch_to.h
@@ -11,9 +11,9 @@
 #define __ASM_SH_SWITCH_TO_H
 
 #ifdef CONFIG_SUPERH32
-# include "switch_to_32.h"
+# include <asm/switch_to_32.h>
 #else
-# include "switch_to_64.h"
+# include <asm/switch_to_64.h>
 #endif
 
 #endif /* __ASM_SH_SWITCH_TO_H */
diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h
index aa7777bdc37..847128da6ea 100644
--- a/arch/sh/include/asm/syscall.h
+++ b/arch/sh/include/asm/syscall.h
@@ -4,9 +4,9 @@
 extern const unsigned long sys_call_table[];
 
 #ifdef CONFIG_SUPERH32
-# include "syscall_32.h"
+# include <asm/syscall_32.h>
 #else
-# include "syscall_64.h"
+# include <asm/syscall_64.h>
 #endif
 
 #endif /* __ASM_SH_SYSCALL_H */
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index 507725af2e5..3dbfef06f6b 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -11,9 +11,9 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long fd, unsigned long pgoff);
 
 #ifdef CONFIG_SUPERH32
-# include "syscalls_32.h"
+# include <asm/syscalls_32.h>
 #else
-# include "syscalls_64.h"
+# include <asm/syscalls_64.h>
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index ec88bfcdf7c..e61d43d9f68 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -2,7 +2,7 @@
 #define __ASM_SH_TLB_H
 
 #ifdef CONFIG_SUPERH64
-# include "tlb_64.h"
+# include <asm/tlb_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sh/include/asm/traps.h b/arch/sh/include/asm/traps.h
index afd9df8d064..9cc149a0dbd 100644
--- a/arch/sh/include/asm/traps.h
+++ b/arch/sh/include/asm/traps.h
@@ -4,9 +4,9 @@
 #include <linux/compiler.h>
 
 #ifdef CONFIG_SUPERH32
-# include "traps_32.h"
+# include <asm/traps_32.h>
 #else
-# include "traps_64.h"
+# include <asm/traps_64.h>
 #endif
 
 BUILD_TRAP_HANDLER(address_error);
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index 8698a80ed00..9486376605f 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -97,9 +97,9 @@ struct __large_struct { unsigned long buf[100]; };
 })
 
 #ifdef CONFIG_SUPERH32
-# include "uaccess_32.h"
+# include <asm/uaccess_32.h>
 #else
-# include "uaccess_64.h"
+# include <asm/uaccess_64.h>
 #endif
 
 extern long strncpy_from_user(char *dest, const char __user *src, long count);
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 7bc67076baa..307201a854f 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -1,8 +1,8 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_SUPERH32
-#  include "unistd_32.h"
+#  include <asm/unistd_32.h>
 # else
-#  include "unistd_64.h"
+#  include <asm/unistd_64.h>
 # endif
 
 # define __ARCH_WANT_SYS_RT_SIGSUSPEND
@@ -40,8 +40,8 @@
 
 #else
 # ifdef __SH5__
-#  include "unistd_64.h"
+#  include <asm/unistd_64.h>
 # else
-#  include "unistd_32.h"
+#  include <asm/unistd_32.h>
 # endif
 #endif
diff --git a/arch/sh/include/mach-ecovec24/mach/romimage.h b/arch/sh/include/mach-ecovec24/mach/romimage.h
index d63ef51ec18..60f3e8af05f 100644
--- a/arch/sh/include/mach-ecovec24/mach/romimage.h
+++ b/arch/sh/include/mach-ecovec24/mach/romimage.h
@@ -6,7 +6,7 @@
  */
 
 #include <asm/romimage-macros.h>
-#include "partner-jet-setup.txt"
+#include <mach/partner-jet-setup.txt>
 
 	/* execute icbi after enabling cache */
 	mov.l	1f, r0
diff --git a/arch/sh/include/mach-kfr2r09/mach/romimage.h b/arch/sh/include/mach-kfr2r09/mach/romimage.h
index 7a883167c84..1afae21ced5 100644
--- a/arch/sh/include/mach-kfr2r09/mach/romimage.h
+++ b/arch/sh/include/mach-kfr2r09/mach/romimage.h
@@ -6,7 +6,7 @@
  */
 
 #include <asm/romimage-macros.h>
-#include "partner-jet-setup.txt"
+#include <mach/partner-jet-setup.txt>
 
 	/* execute icbi after enabling cache */
 	mov.l	1f, r0
diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h
index 00654feb7db..b9e45e37649 100644
--- a/arch/tile/include/gxio/dma_queue.h
+++ b/arch/tile/include/gxio/dma_queue.h
@@ -19,7 +19,7 @@
  * DMA queue management APIs shared between TRIO and mPIPE.
  */
 
-#include "common.h"
+#include <gxio/common.h>
 
 /* The credit counter lives in the high 32 bits. */
 #define DMA_QUEUE_CREDIT_SHIFT 32
diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h
index 78c598618c9..b74f470ed11 100644
--- a/arch/tile/include/gxio/mpipe.h
+++ b/arch/tile/include/gxio/mpipe.h
@@ -21,8 +21,8 @@
  * resources.
  */
 
-#include "common.h"
-#include "dma_queue.h"
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
 
 #include <linux/time.h>
 
diff --git a/arch/tile/include/gxio/trio.h b/arch/tile/include/gxio/trio.h
index 77b80cdd46d..df10a662cc2 100644
--- a/arch/tile/include/gxio/trio.h
+++ b/arch/tile/include/gxio/trio.h
@@ -140,8 +140,8 @@
 
 #include <linux/types.h>
 
-#include "common.h"
-#include "dma_queue.h"
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
 
 #include <arch/trio_constants.h>
 #include <arch/trio.h>
diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h
index a60a126e456..5eedec0e988 100644
--- a/arch/tile/include/gxio/usb_host.h
+++ b/arch/tile/include/gxio/usb_host.h
@@ -14,7 +14,7 @@
 #ifndef _GXIO_USB_H_
 #define _GXIO_USB_H_
 
-#include "common.h"
+#include <gxio/common.h>
 
 #include <hv/drv_usb_host_intf.h>
 #include <hv/iorpc.h>
diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h
index 89c72a5d934..ddf1604482b 100644
--- a/arch/tile/include/hv/iorpc.h
+++ b/arch/tile/include/hv/iorpc.h
@@ -248,7 +248,7 @@
 #if defined(__HV__)
 #include <hv/hypervisor.h>
 #elif defined(__KERNEL__)
-#include "hypervisor.h"
+#include <hv/hypervisor.h>
 #include <linux/types.h>
 #else
 #include <stdint.h>
diff --git a/arch/unicore32/include/mach/PKUnity.h b/arch/unicore32/include/mach/PKUnity.h
index 8040d575ddd..46705afcbf5 100644
--- a/arch/unicore32/include/mach/PKUnity.h
+++ b/arch/unicore32/include/mach/PKUnity.h
@@ -15,7 +15,7 @@
 #error You must include hardware.h not PKUnity.h
 #endif
 
-#include "bitfield.h"
+#include <mach/bitfield.h>
 
 /*
  * Memory Definitions
@@ -32,7 +32,7 @@
  * 0x98000000 - 0x9FFFFFFF 128MB  PCI PCI-AHB MEM-mapping
  */
 #define PKUNITY_PCI_BASE		io_p2v(0x80000000) /* 0x80000000 - 0xBFFFFFFF 1GB */
-#include "regs-pci.h"
+#include <mach/regs-pci.h>
 
 #define PKUNITY_PCICFG_BASE		(PKUNITY_PCI_BASE + 0x0)
 #define PKUNITY_PCIBRI_BASE		(PKUNITY_PCI_BASE + 0x00010000)
@@ -50,18 +50,18 @@
 #define PKUNITY_ARBITER_BASE		(PKUNITY_AHB_BASE + 0x000000) /* AHB-2 */
 #define PKUNITY_DDR2CTRL_BASE		(PKUNITY_AHB_BASE + 0x100000) /* AHB-3 */
 #define PKUNITY_DMAC_BASE		(PKUNITY_AHB_BASE + 0x200000) /* AHB-4 */
-#include "regs-dmac.h"
+#include <mach/regs-dmac.h>
 #define PKUNITY_UMAL_BASE		(PKUNITY_AHB_BASE + 0x300000) /* AHB-5 */
-#include "regs-umal.h"
+#include <mach/regs-umal.h>
 #define PKUNITY_USB_BASE		(PKUNITY_AHB_BASE + 0x400000) /* AHB-6 */
 #define PKUNITY_SATA_BASE		(PKUNITY_AHB_BASE + 0x500000) /* AHB-7 */
 #define PKUNITY_SMC_BASE		(PKUNITY_AHB_BASE + 0x600000) /* AHB-8 */
 /* AHB-9 is for APB bridge */
 #define PKUNITY_MME_BASE		(PKUNITY_AHB_BASE + 0x700000) /* AHB-10 */
 #define PKUNITY_UNIGFX_BASE		(PKUNITY_AHB_BASE + 0x800000) /* AHB-11 */
-#include "regs-unigfx.h"
+#include <mach/regs-unigfx.h>
 #define PKUNITY_NAND_BASE		(PKUNITY_AHB_BASE + 0x900000) /* AHB-12 */
-#include "regs-nand.h"
+#include <mach/regs-nand.h>
 #define PKUNITY_H264D_BASE		(PKUNITY_AHB_BASE + 0xA00000) /* AHB-13 */
 #define PKUNITY_H264E_BASE		(PKUNITY_AHB_BASE + 0xB00000) /* AHB-14 */
 
@@ -72,27 +72,27 @@
 
 #define PKUNITY_UART0_BASE		(PKUNITY_APB_BASE + 0x000000) /* APB-0 */
 #define PKUNITY_UART1_BASE		(PKUNITY_APB_BASE + 0x100000) /* APB-1 */
-#include "regs-uart.h"
+#include <mach/regs-uart.h>
 #define PKUNITY_I2C_BASE		(PKUNITY_APB_BASE + 0x200000) /* APB-2 */
-#include "regs-i2c.h"
+#include <mach/regs-i2c.h>
 #define PKUNITY_SPI_BASE		(PKUNITY_APB_BASE + 0x300000) /* APB-3 */
-#include "regs-spi.h"
+#include <mach/regs-spi.h>
 #define PKUNITY_AC97_BASE		(PKUNITY_APB_BASE + 0x400000) /* APB-4 */
-#include "regs-ac97.h"
+#include <mach/regs-ac97.h>
 #define PKUNITY_GPIO_BASE		(PKUNITY_APB_BASE + 0x500000) /* APB-5 */
-#include "regs-gpio.h"
+#include <mach/regs-gpio.h>
 #define PKUNITY_INTC_BASE		(PKUNITY_APB_BASE + 0x600000) /* APB-6 */
-#include "regs-intc.h"
+#include <mach/regs-intc.h>
 #define PKUNITY_RTC_BASE		(PKUNITY_APB_BASE + 0x700000) /* APB-7 */
-#include "regs-rtc.h"
+#include <mach/regs-rtc.h>
 #define PKUNITY_OST_BASE		(PKUNITY_APB_BASE + 0x800000) /* APB-8 */
-#include "regs-ost.h"
+#include <mach/regs-ost.h>
 #define PKUNITY_RESETC_BASE		(PKUNITY_APB_BASE + 0x900000) /* APB-9 */
-#include "regs-resetc.h"
+#include <mach/regs-resetc.h>
 #define PKUNITY_PM_BASE			(PKUNITY_APB_BASE + 0xA00000) /* APB-10 */
-#include "regs-pm.h"
+#include <mach/regs-pm.h>
 #define PKUNITY_PS2_BASE		(PKUNITY_APB_BASE + 0xB00000) /* APB-11 */
-#include "regs-ps2.h"
+#include <mach/regs-ps2.h>
 #define PKUNITY_SDC_BASE		(PKUNITY_APB_BASE + 0xC00000) /* APB-12 */
-#include "regs-sdc.h"
+#include <mach/regs-sdc.h>
 
diff --git a/arch/unicore32/include/mach/hardware.h b/arch/unicore32/include/mach/hardware.h
index 930bea6e129..9e20b5d9ed5 100644
--- a/arch/unicore32/include/mach/hardware.h
+++ b/arch/unicore32/include/mach/hardware.h
@@ -15,7 +15,7 @@
 #ifndef __MACH_PUV3_HARDWARE_H__
 #define __MACH_PUV3_HARDWARE_H__
 
-#include "PKUnity.h"
+#include <mach/PKUnity.h>
 
 #ifndef __ASSEMBLY__
 #define io_p2v(x)	(void __iomem *)((x) - PKUNITY_MMIO_BASE)
diff --git a/arch/unicore32/include/mach/uncompress.h b/arch/unicore32/include/mach/uncompress.h
index 142d3e7958a..9be67c9d3b5 100644
--- a/arch/unicore32/include/mach/uncompress.h
+++ b/arch/unicore32/include/mach/uncompress.h
@@ -13,8 +13,8 @@
 #ifndef __MACH_PUV3_UNCOMPRESS_H__
 #define __MACH_PUV3_UNCOMPRESS_H__
 
-#include "hardware.h"
-#include "ocd.h"
+#include <mach/hardware.h>
+#include <mach/ocd.h>
 
 extern char input_data[];
 extern char input_data_end[];
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 58cb6d4085f..250b8774c15 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -309,9 +309,9 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
 #define smp_mb__after_atomic_inc()	barrier()
 
 #ifdef CONFIG_X86_32
-# include "atomic64_32.h"
+# include <asm/atomic64_32.h>
 #else
-# include "atomic64_64.h"
+# include <asm/atomic64_64.h>
 #endif
 
 #endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 7f8422a28a4..0fa67503391 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -46,7 +46,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 */
 
-#include "dwarf2.h"
+#include <asm/dwarf2.h>
 
 /*
  * 64-bit system call stack frame layout defines and helpers,
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index 848850fd7d6..5f5bb0f9736 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "checksum_32.h"
+# include <asm/checksum_32.h>
 #else
-# include "checksum_64.h"
+# include <asm/checksum_64.h>
 #endif
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99480e55973..8d871eaddb6 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -138,9 +138,9 @@ extern void __add_wrong_size(void)
 	__raw_cmpxchg((ptr), (old), (new), (size), "")
 
 #ifdef CONFIG_X86_32
-# include "cmpxchg_32.h"
+# include <asm/cmpxchg_32.h>
 #else
-# include "cmpxchg_64.h"
+# include <asm/cmpxchg_64.h>
 #endif
 
 #ifdef __HAVE_ARCH_CMPXCHG
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
index 64217ea16a3..d497bc425ca 100644
--- a/arch/x86/include/asm/mmzone.h
+++ b/arch/x86/include/asm/mmzone.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "mmzone_32.h"
+# include <asm/mmzone_32.h>
 #else
-# include "mmzone_64.h"
+# include <asm/mmzone_64.h>
 #endif
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
index a731b9c573a..7d3a4827539 100644
--- a/arch/x86/include/asm/mutex.h
+++ b/arch/x86/include/asm/mutex.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "mutex_32.h"
+# include <asm/mutex_32.h>
 #else
-# include "mutex_64.h"
+# include <asm/mutex_64.h>
 #endif
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bfacd2ccf65..49119fcea2d 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -53,9 +53,9 @@ static inline int numa_cpu_node(int cpu)
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_X86_32
-# include "numa_32.h"
+# include <asm/numa_32.h>
 #else
-# include "numa_64.h"
+# include <asm/numa_64.h>
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index df75d07571c..6e41b934392 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -141,7 +141,7 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq);
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_64
-#include "pci_64.h"
+#include <asm/pci_64.h>
 #endif
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 49afb3f41eb..fc994846529 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -384,9 +384,9 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
-# include "pgtable_32.h"
+# include <asm/pgtable_32.h>
 #else
-# include "pgtable_64.h"
+# include <asm/pgtable_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index db8fec6d295..ec8a1fc9505 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,9 +174,9 @@
 #endif
 
 #ifdef CONFIG_X86_32
-# include "pgtable_32_types.h"
+# include <asm/pgtable_32_types.h>
 #else
-# include "pgtable_64_types.h"
+# include <asm/pgtable_64_types.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index 7ef7c3020e5..bad3665c25f 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -1,15 +1,15 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_X86_32
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #else
 # ifdef __i386__
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # elif defined(__ILP32__)
-#  include "posix_types_x32.h"
+#  include <asm/posix_types_x32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #endif
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index c62e58a5a90..0f3d7f09922 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "seccomp_32.h"
+# include <asm/seccomp_32.h>
 #else
-# include "seccomp_64.h"
+# include <asm/seccomp_64.h>
 #endif
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 6dfd6d9373a..09224d7a586 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "string_32.h"
+# include <asm/string_32.h>
 #else
-# include "string_64.h"
+# include <asm/string_64.h>
 #endif
diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index 9bd521fe457..2fab6c2c357 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "suspend_32.h"
+# include <asm/suspend_32.h>
 #else
-# include "suspend_64.h"
+# include <asm/suspend_64.h>
 #endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a91acfbb1a9..7ccf8d13153 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -589,9 +589,9 @@ extern struct movsl_mask {
 #define ARCH_HAS_NOCACHE_UACCESS 1
 
 #ifdef CONFIG_X86_32
-# include "uaccess_32.h"
+# include <asm/uaccess_32.h>
 #else
-# include "uaccess_64.h"
+# include <asm/uaccess_64.h>
 #endif
 
 #endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 24532c7da3d..ccab4af1646 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -2,9 +2,9 @@
 #define _ASM_X86_USER_H
 
 #ifdef CONFIG_X86_32
-# include "user_32.h"
+# include <asm/user_32.h>
 #else
-# include "user_64.h"
+# include <asm/user_64.h>
 #endif
 
 #include <asm/types.h>
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index cbf0c9d50b9..80502a2bb78 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -116,9 +116,9 @@ struct arch_shared_info {
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
-#include "interface_32.h"
+#include <asm/xen/interface_32.h>
 #else
-#include "interface_64.h"
+#include <asm/xen/interface_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 7fcf6f3dbcc..f8fde90bc45 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -3,8 +3,8 @@
 # include <asm-generic/xor.h>
 #else
 #ifdef CONFIG_X86_32
-# include "xor_32.h"
+# include <asm/xor_32.h>
 #else
-# include "xor_64.h"
+# include <asm/xor_64.h>
 #endif
 #endif
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index aabd5850bdb..f79cb7ec0e0 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -822,7 +822,7 @@ static struct xor_block_template xor_block_pIII_sse = {
 };
 
 /* Also try the AVX routines */
-#include "xor_avx.h"
+#include <asm/xor_avx.h>
 
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 5fc06d0b7eb..87ac522c4af 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -306,7 +306,7 @@ static struct xor_block_template xor_block_sse = {
 
 
 /* Also try the AVX routines */
-#include "xor_avx.h"
+#include <asm/xor_avx.h>
 
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES			\
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index c433d5e2767..c1ea8436961 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -53,14 +53,14 @@
  *
  * Note: The order of these include files is important.
  */
-#include "platform/acenv.h"	/* Environment-specific items */
-#include "acnames.h"		/* Common ACPI names and strings */
-#include "actypes.h"		/* ACPICA data types and structures */
-#include "acexcep.h"		/* ACPICA exceptions */
-#include "actbl.h"		/* ACPI table definitions */
-#include "acoutput.h"		/* Error output and Debug macros */
-#include "acrestyp.h"		/* Resource Descriptor structs */
-#include "acpiosxf.h"		/* OSL interfaces (ACPICA-to-OS) */
-#include "acpixf.h"		/* ACPI core subsystem external interfaces */
+#include <acpi/platform/acenv.h>	/* Environment-specific items */
+#include <acpi/acnames.h>		/* Common ACPI names and strings */
+#include <acpi/actypes.h>		/* ACPICA data types and structures */
+#include <acpi/acexcep.h>		/* ACPICA exceptions */
+#include <acpi/actbl.h>		/* ACPI table definitions */
+#include <acpi/acoutput.h>		/* Error output and Debug macros */
+#include <acpi/acrestyp.h>		/* Resource Descriptor structs */
+#include <acpi/acpiosxf.h>		/* OSL interfaces (ACPICA-to-OS) */
+#include <acpi/acpixf.h>		/* ACPI core subsystem external interfaces */
 
 #endif				/* __ACPI_H__ */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 0650f5fa7ce..1222ba93d80 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -47,8 +47,8 @@
 #ifndef __ACPIOSXF_H__
 #define __ACPIOSXF_H__
 
-#include "platform/acenv.h"
-#include "actypes.h"
+#include <acpi/platform/acenv.h>
+#include <acpi/actypes.h>
 
 /* Types for acpi_os_execute */
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 26a92fc28a5..51405d32ac6 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -49,9 +49,9 @@
 
 #define ACPI_CA_VERSION                 0x20120711
 
-#include "acconfig.h"
-#include "actypes.h"
-#include "actbl.h"
+#include <acpi/acconfig.h>
+#include <acpi/actypes.h>
+#include <acpi/actbl.h>
 
 extern u8 acpi_gbl_permanent_mmap;
 
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 560a9f272f3..89cee88dd2a 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -138,7 +138,7 @@
 /*! [Begin] no source code translation */
 
 #if defined(_LINUX) || defined(__linux__)
-#include "aclinux.h"
+#include <acpi/platform/aclinux.h>
 
 #elif defined(_AED_EFI)
 #include "acefi.h"
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 7509be30ca0..85d5d8f3845 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -106,7 +106,7 @@
 
 /* Linux uses GCC */
 
-#include "acgcc.h"
+#include <acpi/platform/acgcc.h>
 
 
 #ifdef __KERNEL__
diff --git a/include/drm/drm.h b/include/drm/drm.h
index e51035a3757..1e3481edf06 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -628,7 +628,7 @@ struct drm_prime_handle {
 	__s32 fd;
 };
 
-#include "drm_mode.h"
+#include <drm/drm_mode.h>
 
 #define DRM_IOCTL_BASE			'd'
 #define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1b33df4884b..0c44e4a000f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -85,9 +85,9 @@ struct module;
 struct drm_file;
 struct drm_device;
 
-#include "drm_os_linux.h"
-#include "drm_hashtab.h"
-#include "drm_mm.h"
+#include <drm/drm_os_linux.h>
+#include <drm/drm_hashtab.h>
+#include <drm/drm_mm.h>
 
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
@@ -676,7 +676,7 @@ struct drm_gem_object {
 	struct dma_buf_attachment *import_attach;
 };
 
-#include "drm_crtc.h"
+#include <drm/drm_crtc.h>
 
 /* per-master structure */
 struct drm_master {
@@ -1304,7 +1304,7 @@ extern void drm_vm_close_locked(struct drm_device *dev, struct vm_area_struct *v
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
 
 				/* Memory management support (drm_memory.h) */
-#include "drm_memory.h"
+#include <drm/drm_memory.h>
 extern void drm_free_agp(DRM_AGP_MEM * handle, int pages);
 extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
 extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
@@ -1613,7 +1613,7 @@ void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
-#include "drm_global.h"
+#include <drm/drm_global.h>
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
@@ -1722,7 +1722,7 @@ static __inline__ void drm_core_dropmap(struct drm_local_map *map)
 {
 }
 
-#include "drm_mem_util.h"
+#include <drm/drm_mem_util.h>
 
 extern int drm_fill_in_dev(struct drm_device *dev,
 			   const struct pci_device_id *ent,
diff --git a/include/drm/drm_buffer.h b/include/drm/drm_buffer.h
index 322dbff3f86..c80d3a340b9 100644
--- a/include/drm/drm_buffer.h
+++ b/include/drm/drm_buffer.h
@@ -35,7 +35,7 @@
 #ifndef _DRM_BUFFER_H_
 #define _DRM_BUFFER_H_
 
-#include "drmP.h"
+#include <drm/drmP.h>
 
 struct drm_buffer {
 	int iterator;
diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index 7dc38523380..b0c11a7809b 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -27,8 +27,8 @@
 #ifndef __DRM_ENCODER_SLAVE_H__
 #define __DRM_ENCODER_SLAVE_H__
 
-#include "drmP.h"
-#include "drm_crtc.h"
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
 
 /**
  * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
diff --git a/include/drm/drm_memory.h b/include/drm/drm_memory.h
index 15af9b32ae4..4baf57a207e 100644
--- a/include/drm/drm_memory.h
+++ b/include/drm/drm_memory.h
@@ -35,7 +35,7 @@
 
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 
 /**
  * Cut down version of drm_memory_debug.h, which used to be called
diff --git a/include/drm/drm_sarea.h b/include/drm/drm_sarea.h
index ee5389d22c6..d3aedc90b9f 100644
--- a/include/drm/drm_sarea.h
+++ b/include/drm/drm_sarea.h
@@ -32,7 +32,7 @@
 #ifndef _DRM_SAREA_H_
 #define _DRM_SAREA_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index c20b0018153..1f2acdfbfd6 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -29,7 +29,7 @@
 #ifndef _EXYNOS_DRM_H_
 #define _EXYNOS_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /**
  * User-desired buffer creation information structure.
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8cc70837f92..814a42c8942 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _I915_DRM_H_
 #define _I915_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h
index fca817009e1..2375bfd6e5e 100644
--- a/include/drm/mga_drm.h
+++ b/include/drm/mga_drm.h
@@ -35,7 +35,7 @@
 #ifndef __MGA_DRM_H__
 #define __MGA_DRM_H__
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index dc3a8cd7db8..4766c0f6a83 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -33,7 +33,7 @@
 #ifndef __RADEON_DRM_H__
 #define __RADEON_DRM_H__
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (radeon_sarea.h)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e15f2a89a27..e8028ade567 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -31,7 +31,7 @@
 #ifndef _TTM_BO_API_H_
 #define _TTM_BO_API_H_
 
-#include "drm_hashtab.h"
+#include <drm/drm_hashtab.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/wait.h>
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 084e8989a6e..d803b92b032 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -30,14 +30,14 @@
 #ifndef _TTM_BO_DRIVER_H_
 #define _TTM_BO_DRIVER_H_
 
-#include "ttm/ttm_bo_api.h"
-#include "ttm/ttm_memory.h"
-#include "ttm/ttm_module.h"
-#include "drm_mm.h"
-#include "drm_global.h"
-#include "linux/workqueue.h"
-#include "linux/fs.h"
-#include "linux/spinlock.h"
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_memory.h>
+#include <ttm/ttm_module.h>
+#include <drm/drm_mm.h>
+#include <drm/drm_global.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/spinlock.h>
 
 struct ttm_backend_func {
 	/**
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 26cc7f9ffa4..1926cae373b 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -31,7 +31,7 @@
 #ifndef _TTM_EXECBUF_UTIL_H_
 #define _TTM_EXECBUF_UTIL_H_
 
-#include "ttm/ttm_bo_api.h"
+#include <ttm/ttm_bo_api.h>
 #include <linux/list.h>
 
 /**
diff --git a/include/drm/ttm/ttm_lock.h b/include/drm/ttm/ttm_lock.h
index 2e7f0c941b5..2902beb5f68 100644
--- a/include/drm/ttm/ttm_lock.h
+++ b/include/drm/ttm/ttm_lock.h
@@ -49,7 +49,7 @@
 #ifndef _TTM_LOCK_H_
 #define _TTM_LOCK_H_
 
-#include "ttm/ttm_object.h"
+#include <ttm/ttm_object.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
 
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index e46054e5255..b01c563b275 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -38,7 +38,7 @@
 #define _TTM_OBJECT_H_
 
 #include <linux/list.h>
-#include "drm_hashtab.h"
+#include <drm/drm_hashtab.h>
 #include <linux/kref.h>
 #include <ttm/ttm_memory.h>
 
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 5fe27400d17..706b962c646 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -26,8 +26,8 @@
 #ifndef TTM_PAGE_ALLOC
 #define TTM_PAGE_ALLOC
 
-#include "ttm_bo_driver.h"
-#include "ttm_memory.h"
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_memory.h>
 
 /**
  * Initialize pool allocator.
diff --git a/include/drm/via_drm.h b/include/drm/via_drm.h
index 79b3b6e0f6b..8b0533ccbd5 100644
--- a/include/drm/via_drm.h
+++ b/include/drm/via_drm.h
@@ -24,7 +24,7 @@
 #ifndef _VIA_DRM_H_
 #define _VIA_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 1954a4e305a..4180eb78d57 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -10,7 +10,7 @@
 #include <linux/bcma/bcma_driver_gmac_cmn.h>
 #include <linux/ssb/ssb.h> /* SPROM sharing */
 
-#include "bcma_regs.h"
+#include <linux/bcma/bcma_regs.h>
 
 struct bcma_device;
 struct bcma_bus;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d021610efd6..cf6f4d998a7 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -12,8 +12,8 @@
 #ifndef CEPH_FS_H
 #define CEPH_FS_H
 
-#include "msgr.h"
-#include "rados.h"
+#include <linux/ceph/msgr.h>
+#include <linux/ceph/rados.h>
 
 /*
  * subprotocol versions.  when specific messages types or high-level
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
index 2a79702e092..1df086d7882 100644
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_DEBUGFS_H
 #define _FS_CEPH_DEBUGFS_H
 
-#include "ceph_debug.h"
-#include "types.h"
+#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/types.h>
 
 #define CEPH_DEFINE_SHOW_FUNC(name)					\
 static int name##_open(struct inode *inode, struct file *file)		\
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 4bbf2db45f4..63d092822ba 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -6,7 +6,7 @@
 #include <linux/time.h>
 #include <asm/unaligned.h>
 
-#include "types.h"
+#include <linux/ceph/types.h>
 
 /*
  * in all cases,
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06..6470792b13d 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -1,7 +1,7 @@
 #ifndef _FS_CEPH_LIBCEPH_H
 #define _FS_CEPH_LIBCEPH_H
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
@@ -15,12 +15,12 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "msgpool.h"
-#include "mon_client.h"
-#include "osd_client.h"
-#include "ceph_fs.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/msgpool.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/ceph_fs.h>
 
 /*
  * mount options
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 9935fac8c10..cb15b5d867c 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -2,7 +2,7 @@
 #define _FS_CEPH_MDSMAP_H
 
 #include <linux/bug.h>
-#include "types.h"
+#include <linux/ceph/types.h>
 
 /*
  * mds map - describe servers in the mds cluster.
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 189ae063763..14ba5ee738a 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -8,8 +8,8 @@
 #include <linux/uio.h>
 #include <linux/workqueue.h>
 
-#include "types.h"
-#include "buffer.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
 
 struct ceph_msg;
 struct ceph_connection;
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 2113e3850a4..1fb93e9080b 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -5,7 +5,7 @@
 #include <linux/kref.h>
 #include <linux/rbtree.h>
 
-#include "messenger.h"
+#include <linux/ceph/messenger.h>
 
 struct ceph_client;
 struct ceph_mount_args;
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index 09fa96b4343..4b0d3896072 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -2,7 +2,7 @@
 #define _FS_CEPH_MSGPOOL
 
 #include <linux/mempool.h>
-#include "messenger.h"
+#include <linux/ceph/messenger.h>
 
 /*
  * we use memory pools for preallocating messages we may receive, to
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 311ef8d6aa9..25b930bffea 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -2,8 +2,8 @@
 #define _FS_CEPH_OSDMAP_H
 
 #include <linux/rbtree.h>
-#include "types.h"
-#include "ceph_fs.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
 /*
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 0a99099801a..de91fbdf127 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -6,7 +6,7 @@
  * (Reliable Autonomic Distributed Object Store).
  */
 
-#include "msgr.h"
+#include <linux/ceph/msgr.h>
 
 /*
  * osdmap encoding versions
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
index 28b35a005ec..d3ff1cf2d27 100644
--- a/include/linux/ceph/types.h
+++ b/include/linux/ceph/types.h
@@ -7,9 +7,9 @@
 #include <linux/fcntl.h>
 #include <linux/string.h>
 
-#include "ceph_fs.h"
-#include "ceph_frag.h"
-#include "ceph_hash.h"
+#include <linux/ceph/ceph_fs.h>
+#include <linux/ceph/ceph_frag.h>
+#include <linux/ceph/ceph_hash.h>
 
 /*
  * Identify inodes by both their ino AND snapshot id (a u64).
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 71d79f44a7d..5772dee3ecb 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -8,7 +8,7 @@
  * LGPL2
  */
 
-#include "crush.h"
+#include <linux/crush/crush.h>
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
 extern int crush_do_rule(const struct crush_map *map,
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index 81f52f2c572..82de1f9e48b 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -12,7 +12,7 @@ enum packet_types {
 #define NL_INT64(pn, pr, member)
 #define NL_BIT(pn, pr, member)
 #define NL_STRING(pn, pr, member, len)
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 	P_nl_after_last_packet,
 };
 
@@ -37,7 +37,7 @@ static const int tag_list_sizes[] = {
 #define NL_INT64(pn, pr, member)        + 4 + 8
 #define NL_BIT(pn, pr, member)          + 4 + 1
 #define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 /* The two highest bits are used for the tag type */
@@ -62,7 +62,7 @@ enum drbd_tags {
 #define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
 #define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
 #define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 struct tag {
@@ -78,7 +78,7 @@ static const struct tag tag_descriptions[] = {
 #define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
 #define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
 #define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 #endif
diff --git a/include/linux/libfdt.h b/include/linux/libfdt.h
index 4c0306c69b4..a0c3bf6c9ed 100644
--- a/include/linux/libfdt.h
+++ b/include/linux/libfdt.h
@@ -2,7 +2,7 @@
 #define _INCLUDE_LIBFDT_H_
 
 #include <linux/libfdt_env.h>
-#include "../../scripts/dtc/libfdt/fdt.h"
-#include "../../scripts/dtc/libfdt/libfdt.h"
+#include <>
+#include <>
 
 #endif /* _INCLUDE_LIBFDT_H_ */
diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h
index 8dab5968fc7..3176a277eed 100644
--- a/include/linux/netfilter/nf_conntrack_h323_asn1.h
+++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h
@@ -40,7 +40,7 @@
 /*****************************************************************************
  * H.323 Types
  ****************************************************************************/
-#include "nf_conntrack_h323_types.h"
+#include <linux/netfilter/nf_conntrack_h323_types.h>
 
 typedef struct {
 	enum {
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index e9b7f435084..cbb07f85079 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -15,7 +15,7 @@
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 /* This struct is private to the core and should be regarded as a cookie */
 struct pinctrl;
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 7d22ab00343..e5b1716f98c 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -14,7 +14,7 @@
 
 #include <linux/bug.h>
 
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 enum pinctrl_map_type {
 	PIN_MAP_TYPE_INVALID,
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 69393a66253..7d087f03e91 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -17,7 +17,7 @@
 #include <linux/radix-tree.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 struct device;
 struct pinctrl_dev;
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 1818dcbdd9a..c15395031cb 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -14,7 +14,7 @@
 
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl.h"
+#include <linux/pinctrl/pinctrl.h>
 
 #ifdef CONFIG_PINMUX
 
diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h
index 56e920ade32..303ba1118a4 100644
--- a/include/scsi/osd_attributes.h
+++ b/include/scsi/osd_attributes.h
@@ -1,7 +1,7 @@
 #ifndef __OSD_ATTRIBUTES_H__
 #define __OSD_ATTRIBUTES_H__
 
-#include "osd_protocol.h"
+#include <scsi/osd_protocol.h>
 
 /*
  * Contains types and constants that define attribute pages and attribute
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 572fb549366..b2e85fdd2ae 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -14,8 +14,8 @@
 #ifndef __OSD_INITIATOR_H__
 #define __OSD_INITIATOR_H__
 
-#include "osd_protocol.h"
-#include "osd_types.h"
+#include <scsi/osd_protocol.h>
+#include <scsi/osd_types.h>
 
 #include <linux/blkdev.h>
 #include <scsi/scsi_device.h>
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index 4c09fee8ae1..f96151c9c9e 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -14,8 +14,8 @@
 #ifndef __OSD_SEC_H__
 #define __OSD_SEC_H__
 
-#include "osd_protocol.h"
-#include "osd_types.h"
+#include <scsi/osd_protocol.h>
+#include <scsi/osd_types.h>
 
 /*
  * Contains types and constants of osd capabilities and security
diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 02cbb50225b..fdeb8dceec0 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -28,9 +28,9 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
-#include "pcm.h"
-#include "control.h"
-#include "info.h"
+#include <sound/pcm.h>
+#include <sound/control.h>
+#include <sound/info.h>
 
 /* maximum number of devices on the AC97 bus */
 #define	AC97_BUS_MAX_DEVICES	4
diff --git a/include/sound/ad1816a.h b/include/sound/ad1816a.h
index d010858c33c..a7d8dc782e7 100644
--- a/include/sound/ad1816a.h
+++ b/include/sound/ad1816a.h
@@ -20,9 +20,9 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
-#include "control.h"
-#include "pcm.h"
-#include "timer.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/timer.h>
 
 #define AD1816A_REG(r)			(chip->port + r)
 
diff --git a/include/sound/ak4531_codec.h b/include/sound/ak4531_codec.h
index 575296cf798..85ea86ea35b 100644
--- a/include/sound/ak4531_codec.h
+++ b/include/sound/ak4531_codec.h
@@ -25,8 +25,8 @@
  *
  */
 
-#include "info.h"
-#include "control.h"
+#include <sound/info.h>
+#include <sound/control.h>
 
 /*
  *  ASAHI KASEI - AK4531 codec
diff --git a/include/sound/emu10k1_synth.h b/include/sound/emu10k1_synth.h
index 6ef61c42093..9f211e957bf 100644
--- a/include/sound/emu10k1_synth.h
+++ b/include/sound/emu10k1_synth.h
@@ -20,8 +20,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "emu10k1.h"
-#include "emux_synth.h"
+#include <sound/emu10k1.h>
+#include <sound/emux_synth.h>
 
 /* sequencer device id */
 #define SNDRV_SEQ_DEV_ID_EMU10K1_SYNTH	"emu10k1-synth"
diff --git a/include/sound/emu8000.h b/include/sound/emu8000.h
index c8f66bde6d9..c321302a914 100644
--- a/include/sound/emu8000.h
+++ b/include/sound/emu8000.h
@@ -21,8 +21,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "emux_synth.h"
-#include "seq_kernel.h"
+#include <sound/emux_synth.h>
+#include <sound/seq_kernel.h>
 
 /*
  * Hardware parameters.
diff --git a/include/sound/emux_legacy.h b/include/sound/emux_legacy.h
index 6fe3da2a5e1..baf43fc24d3 100644
--- a/include/sound/emux_legacy.h
+++ b/include/sound/emux_legacy.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "seq_oss_legacy.h"
+#include <sound/seq_oss_legacy.h>
 
 /*
  * awe hardware controls
diff --git a/include/sound/emux_synth.h b/include/sound/emux_synth.h
index d8cb51b86c2..fb81f3722b6 100644
--- a/include/sound/emux_synth.h
+++ b/include/sound/emux_synth.h
@@ -21,15 +21,15 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "seq_kernel.h"
-#include "seq_device.h"
-#include "soundfont.h"
-#include "seq_midi_emul.h"
+#include <sound/seq_kernel.h>
+#include <sound/seq_device.h>
+#include <sound/soundfont.h>
+#include <sound/seq_midi_emul.h>
 #ifdef CONFIG_SND_SEQUENCER_OSS
-#include "seq_oss.h"
+#include <sound/seq_oss.h>
 #endif
-#include "emux_legacy.h"
-#include "seq_virmidi.h"
+#include <sound/emux_legacy.h>
+#include <sound/seq_virmidi.h>
 
 /*
  * compile flags
diff --git a/include/sound/es1688.h b/include/sound/es1688.h
index f752dd33dfa..1d636a2d889 100644
--- a/include/sound/es1688.h
+++ b/include/sound/es1688.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "control.h"
-#include "pcm.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
 #include <linux/interrupt.h>
 
 #define ES1688_HW_AUTO		0x0000
diff --git a/include/sound/gus.h b/include/sound/gus.h
index 841bb8df38c..42905d811da 100644
--- a/include/sound/gus.h
+++ b/include/sound/gus.h
@@ -22,11 +22,11 @@
  *
  */
 
-#include "pcm.h"
-#include "rawmidi.h"
-#include "timer.h"
-#include "seq_midi_emul.h"
-#include "seq_device.h"
+#include <sound/pcm.h>
+#include <sound/rawmidi.h>
+#include <sound/timer.h>
+#include <sound/seq_midi_emul.h>
+#include <sound/seq_device.h>
 #include <asm/io.h>
 
 /* IO ports */
diff --git a/include/sound/mpu401.h b/include/sound/mpu401.h
index 20230db00ef..e9420969251 100644
--- a/include/sound/mpu401.h
+++ b/include/sound/mpu401.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "rawmidi.h"
+#include <sound/rawmidi.h>
 #include <linux/interrupt.h>
 
 #define MPU401_HW_MPU401		1	/* native MPU401 */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index cdca2ab1e71..d0711bc8c91 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -35,7 +35,7 @@
 #define snd_pcm_chip(pcm) ((pcm)->private_data)
 
 #if defined(CONFIG_SND_PCM_OSS) || defined(CONFIG_SND_PCM_OSS_MODULE)
-#include "pcm_oss.h"
+#include <sound/pcm_oss.h>
 #endif
 
 /*
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 6b14359d9fe..adf0885153f 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -30,7 +30,7 @@
 #include <linux/workqueue.h>
 
 #if defined(CONFIG_SND_SEQUENCER) || defined(CONFIG_SND_SEQUENCER_MODULE)
-#include "seq_device.h"
+#include <sound/seq_device.h>
 #endif
 
 /*
diff --git a/include/sound/sb.h b/include/sound/sb.h
index 95353542256..ba396032964 100644
--- a/include/sound/sb.h
+++ b/include/sound/sb.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "pcm.h"
-#include "rawmidi.h"
+#include <sound/pcm.h>
+#include <sound/rawmidi.h>
 #include <linux/interrupt.h>
 #include <asm/io.h>
 
diff --git a/include/sound/sb16_csp.h b/include/sound/sb16_csp.h
index af1b49e982d..7e950560e59 100644
--- a/include/sound/sb16_csp.h
+++ b/include/sound/sb16_csp.h
@@ -119,8 +119,8 @@ struct snd_sb_csp_info {
 #define SNDRV_SB_CSP_IOCTL_RESTART	_IO('H', 0x16)
 
 #ifdef __KERNEL__
-#include "sb.h"
-#include "hwdep.h"
+#include <sound/sb.h>
+#include <sound/hwdep.h>
 #include <linux/firmware.h>
 
 struct snd_sb_csp;
diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h
index f352a98ce4f..2398521f099 100644
--- a/include/sound/seq_kernel.h
+++ b/include/sound/seq_kernel.h
@@ -22,7 +22,7 @@
  *
  */
 #include <linux/time.h>
-#include "asequencer.h"
+#include <sound/asequencer.h>
 
 typedef struct snd_seq_real_time snd_seq_real_time_t;
 typedef union snd_seq_timestamp snd_seq_timestamp_t;
diff --git a/include/sound/seq_midi_emul.h b/include/sound/seq_midi_emul.h
index d6c4615901b..8139d8c191e 100644
--- a/include/sound/seq_midi_emul.h
+++ b/include/sound/seq_midi_emul.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "seq_kernel.h"
+#include <sound/seq_kernel.h>
 
 /*
  * This structure is used to keep track of the current state on each
diff --git a/include/sound/seq_midi_event.h b/include/sound/seq_midi_event.h
index 5efab8b29c5..e40f43e6fc7 100644
--- a/include/sound/seq_midi_event.h
+++ b/include/sound/seq_midi_event.h
@@ -22,7 +22,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "asequencer.h"
+#include <sound/asequencer.h>
 
 #define MAX_MIDI_EVENT_BUF	256
 
diff --git a/include/sound/seq_oss.h b/include/sound/seq_oss.h
index 9b060bbd6e0..d0b27ec6f8b 100644
--- a/include/sound/seq_oss.h
+++ b/include/sound/seq_oss.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "asequencer.h"
-#include "seq_kernel.h"
+#include <sound/asequencer.h>
+#include <sound/seq_kernel.h>
 
 /*
  * argument structure for synthesizer operations
diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h
index d888433a309..a03acd0d398 100644
--- a/include/sound/seq_virmidi.h
+++ b/include/sound/seq_virmidi.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "rawmidi.h"
-#include "seq_midi_event.h"
+#include <sound/rawmidi.h>
+#include <sound/seq_midi_event.h>
 
 /*
  * device file instance:
diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h
index fa149ca77e4..35e94b3d1ec 100644
--- a/include/sound/snd_wavefront.h
+++ b/include/sound/snd_wavefront.h
@@ -1,10 +1,10 @@
 #ifndef __SOUND_SND_WAVEFRONT_H__
 #define __SOUND_SND_WAVEFRONT_H__
 
-#include "mpu401.h"
-#include "hwdep.h"
-#include "rawmidi.h"
-#include "wavefront.h"  /* generic OSS/ALSA/user-level wavefront header */
+#include <sound/mpu401.h>
+#include <sound/hwdep.h>
+#include <sound/rawmidi.h>
+#include <sound/wavefront.h>  /* generic OSS/ALSA/user-level wavefront header */
 
 /* MIDI interface */
 
diff --git a/include/sound/soundfont.h b/include/sound/soundfont.h
index 679df057406..7c93efdba90 100644
--- a/include/sound/soundfont.h
+++ b/include/sound/soundfont.h
@@ -22,8 +22,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "sfnt_info.h"
-#include "util_mem.h"
+#include <sound/sfnt_info.h>
+#include <sound/util_mem.h>
 
 #define SF_MAX_INSTRUMENTS	128	/* maximum instrument number */
 #define SF_MAX_PRESETS  256	/* drums are mapped from 128 to 256 */
diff --git a/include/sound/tea6330t.h b/include/sound/tea6330t.h
index 51b282b7689..e6beec23d7f 100644
--- a/include/sound/tea6330t.h
+++ b/include/sound/tea6330t.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "i2c.h"		/* generic i2c support */
+#include <sound/i2c.h>		/* generic i2c support */
 
 int snd_tea6330t_detect(struct snd_i2c_bus *bus, int equalizer);
 int snd_tea6330t_update_mixer(struct snd_card *card, struct snd_i2c_bus *bus,
diff --git a/include/sound/wss.h b/include/sound/wss.h
index fd01f22825c..0c7f034f1e8 100644
--- a/include/sound/wss.h
+++ b/include/sound/wss.h
@@ -22,11 +22,11 @@
  *
  */
 
-#include "control.h"
-#include "pcm.h"
-#include "timer.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/timer.h>
 
-#include "cs4231-regs.h"
+#include <sound/cs4231-regs.h>
 
 /* defines for codec.mode */
 
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 388bcdd26d4..fde1b3e94c7 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
 
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 08fa27244da..6bc943ecb84 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 DECLARE_EVENT_CLASS(kmem_alloc,
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index bab3b87e406..63cfcccaebb 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -8,7 +8,7 @@
 #include <linux/tracepoint.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 #define RECLAIM_WB_ANON		0x0001u
 #define RECLAIM_WB_FILE		0x0002u
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
index 2ae3cd24326..8c5fa0e2015 100644
--- a/include/xen/interface/callback.h
+++ b/include/xen/interface/callback.h
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_CALLBACK_H__
 #define __XEN_PUBLIC_CALLBACK_H__
 
-#include "xen.h"
+#include <xen/interface/xen.h>
 
 /*
  * Prototype for this hypercall is:
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
index 1b4f923d708..a6c79911e72 100644
--- a/include/xen/interface/hvm/params.h
+++ b/include/xen/interface/hvm/params.h
@@ -21,7 +21,7 @@
 #ifndef __XEN_PUBLIC_HVM_PARAMS_H__
 #define __XEN_PUBLIC_HVM_PARAMS_H__
 
-#include "hvm_op.h"
+#include <xen/interface/hvm/hvm_op.h>
 
 /*
  * Parameter space for HVMOP_{set,get}_param.
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ee338bfde18..01c3d62436e 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -9,8 +9,8 @@
 #ifndef __XEN_PUBLIC_IO_BLKIF_H__
 #define __XEN_PUBLIC_IO_BLKIF_H__
 
-#include "ring.h"
-#include "../grant_table.h"
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
 
 /*
  * Front->back notifications: When enqueuing a new request, sending a
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index cb94668f6e9..9dfc1200098 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -9,8 +9,8 @@
 #ifndef __XEN_PUBLIC_IO_NETIF_H__
 #define __XEN_PUBLIC_IO_NETIF_H__
 
-#include "ring.h"
-#include "../grant_table.h"
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
 
 /*
  * Notifications after enqueuing any type of message should be conditional on
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 61fa6616098..52ff8377d3b 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_PLATFORM_H__
 #define __XEN_PUBLIC_PLATFORM_H__
 
-#include "xen.h"
+#include <xen/interface/xen.h>
 
 #define XENPF_INTERFACE_VERSION 0x03000001
 
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index dd55dac340d..9ce083960a2 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -9,7 +9,7 @@
 #ifndef __XEN_PUBLIC_SCHED_H__
 #define __XEN_PUBLIC_SCHED_H__
 
-#include "event_channel.h"
+#include <xen/interface/event_channel.h>
 
 /*
  * The prototype for this hypercall is:
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index e8b6519d47e..ff372a5ddfe 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -55,7 +55,7 @@ struct xen_feature_info {
 };
 
 /* Declares the features reported by XENVER_get_features. */
-#include "features.h"
+#include <xen/interface/features.h>
 
 /* arg == NULL; returns host memory page size. */
 #define XENVER_pagesize 7
-- 
cgit v1.2.3-70-g09d2


From abbf1590de22a6d2240a59383477da50d1402f6a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:26 +0100
Subject: UAPI: Partition the header include path sets and add uapi/ header
 directories

Partition the header include path flags into two sets, one for kernelspace
builds and one for userspace builds.

Add the following directories to build after the ordinary include directories
so that #include will pick up the UAPI header directly if the kernel header
has been moved there.

The userspace set (represented by the USERINCLUDE make variable) contains:

	-I $(srctree)/arch/$(hdr-arch)/include/uapi
	-I arch/$(hdr-arch)/include/generated/uapi
	-I $(srctree)/include/uapi
	-I include/generated/uapi
	-include $(srctree)/include/linux/kconfig.h

and the kernelspace set (represented by the LINUXINCLUDE make variable)
contains:

	-I $(srctree)/arch/$(hdr-arch)/include
	-I arch/$(hdr-arch)/include/generated
	-I $(srctree)/include
	-I include		--- if not building in the source tree

plus everything in the USERINCLUDE set.

Then use USERINCLUDE in building the x86 boot code.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 Makefile                          | 18 ++++++++++++++----
 arch/cris/Makefile                |  4 +++-
 arch/um/Makefile                  |  4 +++-
 arch/x86/boot/Makefile            |  4 ++--
 arch/x86/boot/mkcpustr.c          |  2 ++
 arch/x86/include/asm/cpufeature.h |  2 ++
 arch/x86/kernel/cpu/mkcapflags.pl |  5 ++++-
 7 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/Makefile b/Makefile
index 846dd760785..da1707c3904 100644
--- a/Makefile
+++ b/Makefile
@@ -350,12 +350,22 @@ AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
 
 
+# Use USERINCLUDE when you must reference the UAPI directories only.
+USERINCLUDE    := \
+		-I$(srctree)/arch/$(hdr-arch)/include/uapi \
+		-Iarch/$(hdr-arch)/include/generated/uapi \
+		-I$(srctree)/include/uapi \
+		-Iinclude/generated/uapi \
+                -include $(srctree)/include/linux/kconfig.h
+
 # Use LINUXINCLUDE when you must reference the include/ directory.
 # Needed to be compatible with the O= option
-LINUXINCLUDE    := -I$(srctree)/arch/$(hdr-arch)/include \
-                   -Iarch/$(hdr-arch)/include/generated -Iinclude \
-                   $(if $(KBUILD_SRC), -I$(srctree)/include) \
-                   -include $(srctree)/include/linux/kconfig.h
+LINUXINCLUDE    := \
+		-I$(srctree)/arch/$(hdr-arch)/include \
+		-Iarch/$(hdr-arch)/include/generated \
+		$(if $(KBUILD_SRC), -I$(srctree)/include) \
+		-Iinclude \
+		$(USERINCLUDE)
 
 KBUILD_CPPFLAGS := -D__KERNEL__
 
diff --git a/arch/cris/Makefile b/arch/cris/Makefile
index 29c2ceb38a7..39dc7d00083 100644
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -23,7 +23,9 @@ mach-$(CONFIG_ETRAXFS) := fs
 
 ifneq ($(arch-y),)
 SARCH := arch-$(arch-y)
-inc := -Iarch/cris/include/$(SARCH)
+inc := -Iarch/cris/include/uapi/$(SARCH)
+inc += -Iarch/cris/include/$(SARCH)
+inc += -Iarch/cris/include/uapi/$(SARCH)/arch
 inc += -Iarch/cris/include/$(SARCH)/arch
 else
 SARCH :=
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 097091059aa..133f7de2a13 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -66,7 +66,9 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
-		   -I$(HOST_DIR)/include/generated
+		   -I$(srctree)/$(HOST_DIR)/include/uapi \
+		   -I$(HOST_DIR)/include/generated \
+		   -I$(HOST_DIR)/include/generated/uapi
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index f7535bedc33..ce03476d8c8 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,7 @@ setup-y		+= video-bios.o
 targets		+= $(setup-y)
 hostprogs-y	:= mkcpustr tools/build
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
 	            -D__EXPORTED_HEADERS__
 
 $(obj)/cpu.o: $(obj)/cpustr.h
@@ -52,7 +52,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
 
 # How to compile the 16-bit code.  Note we always compile for -march=i386,
 # that way we can complain to the user if the CPU is insufficient.
-KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
+KBUILD_CFLAGS	:= $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
 		   -DDISABLE_BRANCH_PROFILING \
 		   -Wall -Wstrict-prototypes \
 		   -march=i386 -mregparm=3 \
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 919257f526f..4579eff0ef4 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,6 +15,8 @@
 
 #include <stdio.h>
 
+#include "../include/asm/required-features.h"
+#include "../include/asm/cpufeature.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 16cae425d1f..8c297aa53ee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,7 +4,9 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
 #include <asm/required-features.h>
+#endif
 
 #define NCAPINTS	10	/* N 32-bit words worth of info */
 
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index c7b3fe2d72e..091972ef49d 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -8,7 +8,10 @@
 open(IN, "< $in\0")   or die "$0: cannot open: $in: $!\n";
 open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
 
-print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
+print OUT "#include <asm/cpufeature.h>\n";
+print OUT "#endif\n";
+print OUT "\n";
 print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
 
 %features = ();
-- 
cgit v1.2.3-70-g09d2


From 4413e16d9d21673bb5048a2e542f1aaa00015c2e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:35 +0100
Subject: UAPI: (Scripted) Set up UAPI Kbuild files

Set up empty UAPI Kbuild files to be populated by the header splitter.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/alpha/include/uapi/asm/Kbuild          |  3 +++
 arch/arm/include/uapi/asm/Kbuild            |  3 +++
 arch/arm64/include/uapi/asm/Kbuild          |  3 +++
 arch/avr32/include/uapi/asm/Kbuild          |  3 +++
 arch/blackfin/include/uapi/asm/Kbuild       |  3 +++
 arch/c6x/include/uapi/asm/Kbuild            |  3 +++
 arch/cris/include/uapi/arch-v10/arch/Kbuild |  1 +
 arch/cris/include/uapi/arch-v32/arch/Kbuild |  1 +
 arch/cris/include/uapi/asm/Kbuild           |  5 +++++
 arch/frv/include/uapi/asm/Kbuild            |  3 +++
 arch/h8300/include/uapi/asm/Kbuild          |  3 +++
 arch/hexagon/include/uapi/asm/Kbuild        |  3 +++
 arch/ia64/include/uapi/asm/Kbuild           |  3 +++
 arch/m32r/include/uapi/asm/Kbuild           |  3 +++
 arch/m68k/include/uapi/asm/Kbuild           |  3 +++
 arch/microblaze/include/uapi/asm/Kbuild     |  3 +++
 arch/mips/include/uapi/asm/Kbuild           |  3 +++
 arch/mn10300/include/uapi/asm/Kbuild        |  3 +++
 arch/openrisc/include/uapi/asm/Kbuild       |  3 +++
 arch/parisc/include/uapi/asm/Kbuild         |  3 +++
 arch/powerpc/include/uapi/asm/Kbuild        |  3 +++
 arch/s390/include/uapi/asm/Kbuild           |  3 +++
 arch/score/include/uapi/asm/Kbuild          |  3 +++
 arch/sh/include/uapi/asm/Kbuild             |  3 +++
 arch/sparc/include/uapi/asm/Kbuild          |  5 +++++
 arch/tile/include/uapi/arch/Kbuild          |  1 +
 arch/tile/include/uapi/asm/Kbuild           |  3 +++
 arch/unicore32/include/uapi/asm/Kbuild      |  3 +++
 arch/x86/include/uapi/asm/Kbuild            |  6 ++++++
 arch/xtensa/include/uapi/asm/Kbuild         |  3 +++
 include/uapi/Kbuild                         | 14 ++++++++++++++
 include/uapi/asm-generic/Kbuild             |  1 +
 include/uapi/drm/Kbuild                     |  1 +
 include/uapi/linux/Kbuild                   | 22 ++++++++++++++++++++++
 include/uapi/linux/byteorder/Kbuild         |  1 +
 include/uapi/linux/caif/Kbuild              |  1 +
 include/uapi/linux/can/Kbuild               |  1 +
 include/uapi/linux/dvb/Kbuild               |  1 +
 include/uapi/linux/hdlc/Kbuild              |  1 +
 include/uapi/linux/hsi/Kbuild               |  1 +
 include/uapi/linux/isdn/Kbuild              |  1 +
 include/uapi/linux/mmc/Kbuild               |  1 +
 include/uapi/linux/netfilter/Kbuild         |  2 ++
 include/uapi/linux/netfilter/ipset/Kbuild   |  1 +
 include/uapi/linux/netfilter_arp/Kbuild     |  1 +
 include/uapi/linux/netfilter_bridge/Kbuild  |  1 +
 include/uapi/linux/netfilter_ipv4/Kbuild    |  1 +
 include/uapi/linux/netfilter_ipv6/Kbuild    |  1 +
 include/uapi/linux/nfsd/Kbuild              |  1 +
 include/uapi/linux/raid/Kbuild              |  1 +
 include/uapi/linux/spi/Kbuild               |  1 +
 include/uapi/linux/sunrpc/Kbuild            |  1 +
 include/uapi/linux/tc_act/Kbuild            |  1 +
 include/uapi/linux/tc_ematch/Kbuild         |  1 +
 include/uapi/linux/usb/Kbuild               |  1 +
 include/uapi/linux/wimax/Kbuild             |  1 +
 include/uapi/mtd/Kbuild                     |  1 +
 include/uapi/rdma/Kbuild                    |  1 +
 include/uapi/scsi/Kbuild                    |  2 ++
 include/uapi/scsi/fc/Kbuild                 |  1 +
 include/uapi/sound/Kbuild                   |  1 +
 include/uapi/video/Kbuild                   |  1 +
 include/uapi/xen/Kbuild                     |  1 +
 63 files changed, 160 insertions(+)
 create mode 100644 arch/alpha/include/uapi/asm/Kbuild
 create mode 100644 arch/arm/include/uapi/asm/Kbuild
 create mode 100644 arch/arm64/include/uapi/asm/Kbuild
 create mode 100644 arch/avr32/include/uapi/asm/Kbuild
 create mode 100644 arch/blackfin/include/uapi/asm/Kbuild
 create mode 100644 arch/c6x/include/uapi/asm/Kbuild
 create mode 100644 arch/cris/include/uapi/arch-v10/arch/Kbuild
 create mode 100644 arch/cris/include/uapi/arch-v32/arch/Kbuild
 create mode 100644 arch/cris/include/uapi/asm/Kbuild
 create mode 100644 arch/frv/include/uapi/asm/Kbuild
 create mode 100644 arch/h8300/include/uapi/asm/Kbuild
 create mode 100644 arch/hexagon/include/uapi/asm/Kbuild
 create mode 100644 arch/ia64/include/uapi/asm/Kbuild
 create mode 100644 arch/m32r/include/uapi/asm/Kbuild
 create mode 100644 arch/m68k/include/uapi/asm/Kbuild
 create mode 100644 arch/microblaze/include/uapi/asm/Kbuild
 create mode 100644 arch/mips/include/uapi/asm/Kbuild
 create mode 100644 arch/mn10300/include/uapi/asm/Kbuild
 create mode 100644 arch/openrisc/include/uapi/asm/Kbuild
 create mode 100644 arch/parisc/include/uapi/asm/Kbuild
 create mode 100644 arch/powerpc/include/uapi/asm/Kbuild
 create mode 100644 arch/s390/include/uapi/asm/Kbuild
 create mode 100644 arch/score/include/uapi/asm/Kbuild
 create mode 100644 arch/sh/include/uapi/asm/Kbuild
 create mode 100644 arch/sparc/include/uapi/asm/Kbuild
 create mode 100644 arch/tile/include/uapi/arch/Kbuild
 create mode 100644 arch/tile/include/uapi/asm/Kbuild
 create mode 100644 arch/unicore32/include/uapi/asm/Kbuild
 create mode 100644 arch/x86/include/uapi/asm/Kbuild
 create mode 100644 arch/xtensa/include/uapi/asm/Kbuild
 create mode 100644 include/uapi/Kbuild
 create mode 100644 include/uapi/asm-generic/Kbuild
 create mode 100644 include/uapi/drm/Kbuild
 create mode 100644 include/uapi/linux/Kbuild
 create mode 100644 include/uapi/linux/byteorder/Kbuild
 create mode 100644 include/uapi/linux/caif/Kbuild
 create mode 100644 include/uapi/linux/can/Kbuild
 create mode 100644 include/uapi/linux/dvb/Kbuild
 create mode 100644 include/uapi/linux/hdlc/Kbuild
 create mode 100644 include/uapi/linux/hsi/Kbuild
 create mode 100644 include/uapi/linux/isdn/Kbuild
 create mode 100644 include/uapi/linux/mmc/Kbuild
 create mode 100644 include/uapi/linux/netfilter/Kbuild
 create mode 100644 include/uapi/linux/netfilter/ipset/Kbuild
 create mode 100644 include/uapi/linux/netfilter_arp/Kbuild
 create mode 100644 include/uapi/linux/netfilter_bridge/Kbuild
 create mode 100644 include/uapi/linux/netfilter_ipv4/Kbuild
 create mode 100644 include/uapi/linux/netfilter_ipv6/Kbuild
 create mode 100644 include/uapi/linux/nfsd/Kbuild
 create mode 100644 include/uapi/linux/raid/Kbuild
 create mode 100644 include/uapi/linux/spi/Kbuild
 create mode 100644 include/uapi/linux/sunrpc/Kbuild
 create mode 100644 include/uapi/linux/tc_act/Kbuild
 create mode 100644 include/uapi/linux/tc_ematch/Kbuild
 create mode 100644 include/uapi/linux/usb/Kbuild
 create mode 100644 include/uapi/linux/wimax/Kbuild
 create mode 100644 include/uapi/mtd/Kbuild
 create mode 100644 include/uapi/rdma/Kbuild
 create mode 100644 include/uapi/scsi/Kbuild
 create mode 100644 include/uapi/scsi/fc/Kbuild
 create mode 100644 include/uapi/sound/Kbuild
 create mode 100644 include/uapi/video/Kbuild
 create mode 100644 include/uapi/xen/Kbuild

(limited to 'arch/x86')

diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/avr32/include/uapi/asm/Kbuild b/arch/avr32/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/avr32/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/arch/cris/include/uapi/arch-v10/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/arch/cris/include/uapi/arch-v32/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..f50236ae9ca
--- /dev/null
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += arch-v10/
+header-y += arch-v32/
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..7518ad28696
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+# User exported sparc header files
+
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..83b6e9a0dce
--- /dev/null
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -0,0 +1,6 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+genhdr-y += unistd_32.h
+genhdr-y += unistd_64.h
+genhdr-y += unistd_x32.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..baebb3da1d4
--- /dev/null
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild
new file mode 100644
index 00000000000..81d2106287f
--- /dev/null
+++ b/include/uapi/Kbuild
@@ -0,0 +1,14 @@
+# UAPI Header export list
+# Top-level Makefile calls into asm-$(ARCH)
+# List only non-arch directories below
+
+
+header-y += asm-generic/
+header-y += linux/
+header-y += sound/
+header-y += mtd/
+header-y += rdma/
+header-y += video/
+header-y += drm/
+header-y += xen/
+header-y += scsi/
diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/asm-generic/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/drm/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
new file mode 100644
index 00000000000..13a9cf4cb6f
--- /dev/null
+++ b/include/uapi/linux/Kbuild
@@ -0,0 +1,22 @@
+# UAPI Header export list
+header-y += byteorder/
+header-y += can/
+header-y += caif/
+header-y += dvb/
+header-y += hdlc/
+header-y += hsi/
+header-y += isdn/
+header-y += mmc/
+header-y += nfsd/
+header-y += raid/
+header-y += spi/
+header-y += sunrpc/
+header-y += tc_act/
+header-y += tc_ematch/
+header-y += netfilter/
+header-y += netfilter_arp/
+header-y += netfilter_bridge/
+header-y += netfilter_ipv4/
+header-y += netfilter_ipv6/
+header-y += usb/
+header-y += wimax/
diff --git a/include/uapi/linux/byteorder/Kbuild b/include/uapi/linux/byteorder/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/byteorder/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/caif/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/can/Kbuild b/include/uapi/linux/can/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/can/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/dvb/Kbuild b/include/uapi/linux/dvb/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/dvb/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/hdlc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/hsi/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/isdn/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/mmc/Kbuild b/include/uapi/linux/mmc/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/mmc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
new file mode 100644
index 00000000000..4afbace8e86
--- /dev/null
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += ipset/
diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/netfilter/ipset/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/netfilter_arp/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/netfilter_bridge/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/netfilter_ipv4/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/netfilter_ipv6/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/nfsd/Kbuild b/include/uapi/linux/nfsd/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/nfsd/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/raid/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/spi/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/sunrpc/Kbuild b/include/uapi/linux/sunrpc/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/sunrpc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/usb/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/wimax/Kbuild b/include/uapi/linux/wimax/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/linux/wimax/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/mtd/Kbuild b/include/uapi/mtd/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/mtd/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/rdma/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild
new file mode 100644
index 00000000000..29a87dd26cf
--- /dev/null
+++ b/include/uapi/scsi/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += fc/
diff --git a/include/uapi/scsi/fc/Kbuild b/include/uapi/scsi/fc/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/scsi/fc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/sound/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/video/Kbuild b/include/uapi/video/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/video/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/xen/Kbuild b/include/uapi/xen/Kbuild
new file mode 100644
index 00000000000..aafaa5aa54d
--- /dev/null
+++ b/include/uapi/xen/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
-- 
cgit v1.2.3-70-g09d2


From 584c5ef813c63354ed9e03b732048240c09b86af Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Fix the test_get_len tool

Fix the x86 test_get_len tool to have the right include paths in the right
order (it includes a non-exported kernel header directly), otherwise errors
like the following occur:

/data/fs/linux-2.6-hdr/include/linux/types.h:18:26: error: conflicting types for 'fd_set'
/usr/include/sys/select.h:78:5: note: previous declaration of 'fd_set' was here

and

/data/fs/linux-2.6-hdr/include/linux/string.h:42:12: error: expected identifier or '(' before '__extension__'

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/tools/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index 733057b435b..bae601f900e 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -28,7 +28,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
 hostprogs-y	+= test_get_len insn_sanity
 
 # -I needed for generated C source and C source which in the kernel tree.
-HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
 
 HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
 
-- 
cgit v1.2.3-70-g09d2


From c0522b6cc1237c935b2cead3fa7b45465df2f839 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Fix insn_sanity build failure after UAPI split

Fix a build failure in the x86 insn_sanity program after the UAPI split.  The
problem is that insn_sanity.c #includes arch/x86/lib/insn.c - which uses the
kernel string header.  This leads to conflicts for various definitions against
the /usr/include/ headers.

linux/string.h can be replaced with the normal userspace string.h if __KERNEL__
is not specified.

  HOSTCC  arch/x86/tools/insn_sanity
In file included from /data/fs/linux-2.6-hdr/include/linux/string.h:6:0,
                 from /data/fs/linux-2.6-hdr/arch/x86/lib/insn.c:21,
                 from arch/x86/tools/insn_sanity.c:36:
/data/fs/linux-2.6-hdr/include/linux/types.h:14:26: error: conflicting types for 'fd_set'
/usr/include/sys/select.h:76:5: note: previous declaration of 'fd_set' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:15:25: error: conflicting types for 'dev_t'
/usr/include/sys/types.h:61:17: note: previous declaration of 'dev_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:25:26: error: conflicting types for 'timer_t'
/usr/include/time.h:104:19: note: previous declaration of 'timer_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:45:26: error: conflicting types for 'loff_t'
/usr/include/sys/types.h:45:18: note: previous declaration of 'loff_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:112:17: error: conflicting types for 'u_int64_t'
/usr/include/sys/types.h:204:1: note: previous declaration of 'u_int64_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:113:17: error: conflicting types for 'int64_t'
/usr/include/sys/types.h:198:1: note: previous declaration of 'int64_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:134:23: error: conflicting types for 'blkcnt_t'
/usr/include/sys/types.h:236:20: note: previous declaration of 'blkcnt_t' was here
In file included from /data/fs/linux-2.6-hdr/arch/x86/lib/insn.c:21:0,
                 from arch/x86/tools/insn_sanity.c:36:
/data/fs/linux-2.6-hdr/include/linux/string.h:38:12: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:38:12: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:41:12: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:53:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:61:28: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'skip_spaces'
/data/fs/linux-2.6-hdr/include/linux/string.h:65:28: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'char'
/data/fs/linux-2.6-hdr/include/linux/string.h:83:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:83:15: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:86:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:86:15: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:89:24: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:89:24: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:92:24: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:92:24: error: expected identifier or '(' before ')' token

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/lib/insn.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index b1e6c4b2e8e..54fcffed28e 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -18,7 +18,11 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#ifdef __KERNEL__
 #include <linux/string.h>
+#else
+#include <string.h>
+#endif
 #include <asm/inat.h>
 #include <asm/insn.h>
 
-- 
cgit v1.2.3-70-g09d2


From ec28b7f250b19f31e14b69b015d61d0818bf43a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Differentiate the generated UAPI and internal headers

Differentiate the generated UAPI and internal headers during generation such
that the UAPI headers can be installed elsewhere.

A later patch will use this to move the UAPI headers to:

	arch/x86/include/generated/uapi/asm/

to make them easier to handle.

A previous patch added a -I for this path.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/syscalls/Makefile | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 3236aebc828..174b0324841 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -1,7 +1,9 @@
 out := $(obj)/../include/generated/asm
+uapi := $(obj)/../include/generated/asm
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
+	  $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
 
 syscall32 := $(srctree)/$(src)/syscall_32.tbl
 syscall64 := $(srctree)/$(src)/syscall_64.tbl
@@ -18,7 +20,7 @@ quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
 
 syshdr_abi_unistd_32 := i386
-$(out)/unistd_32.h: $(syscall32) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall32) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_32_ia32 := i386
@@ -28,11 +30,11 @@ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
 
 syshdr_abi_unistd_x32 := common,x32
 syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
-$(out)/unistd_x32.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_x32.h: $(syscall64) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_64 := common,64
-$(out)/unistd_64.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall64) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_64_x32 := x32
@@ -45,11 +47,12 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl)
 $(out)/syscalls_64.h: $(syscall64) $(systbl)
 	$(call if_changed,systbl)
 
-syshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
+uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
 
-targets	+= $(syshdr-y)
+targets	+= $(uapisyshdr-y) $(syshdr-y)
 
-all: $(addprefix $(out)/,$(targets))
+all: $(addprefix $(uapi)/,$(uapisyshdr-y))
+all: $(addprefix $(out)/,$(syshdr-y))
-- 
cgit v1.2.3-70-g09d2


From 10b63956fce7f369cc37fd4d994f09bd5203efe4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:57 +0100
Subject: UAPI: Plumb the UAPI Kbuilds into the user header installation and
 checking

Plumb the UAPI Kbuilds into the user header installation and checking system.
As the headers are split the entries will be transferred across from the old
Kbuild files to the UAPI Kbuild files.

The changes made in this commit are:

 (1) Exported generated files (of which there are currently four) are moved to
     uapi/ directories under the appropriate generated/ directory, thus we
     get:

	include/generated/uapi/linux/version.h
	arch/x86/include/generated/uapi/asm/unistd_32.h
	arch/x86/include/generated/uapi/asm/unistd_64.h
	arch/x86/include/generated/uapi/asm/unistd_x32.h

     These paths were added to the build as -I flags in a previous patch.

 (2) scripts/Makefile.headersinst is now given the UAPI path to install from
     rather than the old path.

     It then determines the old path from that and includes that Kbuild also
     if it exists, thus permitting the headers to exist in either directory
     during the changeover.

     I also renamed the "install" variable to "installdir" as it refers to a
     directory not the install program.

 (3) scripts/headers_install.pl is altered to take a list of source file paths
     instead of just their names so that the makefile can tell it exactly
     where to find each file.

     For the moment, files can be obtained from one of four places for each
     output directory:

	.../include/uapi/foo/
	.../include/generated/uapi/foo/
	.../include/foo/
	.../include/generated/foo/

     The non-UAPI paths will be dropped later.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 Makefile                     | 14 +++++++-------
 arch/x86/include/asm/Kbuild  |  4 ----
 arch/x86/syscalls/Makefile   |  2 +-
 include/linux/Kbuild         |  2 --
 include/uapi/linux/Kbuild    |  2 ++
 scripts/Makefile.headersinst | 45 ++++++++++++++++++++++++++++++--------------
 scripts/headers_install.pl   | 14 ++++++++------
 7 files changed, 49 insertions(+), 34 deletions(-)

(limited to 'arch/x86')

diff --git a/Makefile b/Makefile
index 1a5d315e284..86eb6acb397 100644
--- a/Makefile
+++ b/Makefile
@@ -447,7 +447,7 @@ asm-generic:
 # Detect when mixed targets is specified, and make a second invocation
 # of make so .config is not included in this case either (for *config).
 
-version_h := include/generated/linux/version.h
+version_h := include/generated/uapi/linux/version.h
 
 no-dot-config-targets := clean mrproper distclean \
 			 cscope gtags TAGS tags help %docs check% coccicheck \
@@ -908,10 +908,10 @@ headers_install_all:
 
 PHONY += headers_install
 headers_install: __headers
-	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/asm/Kbuild),, \
-	$(error Headers not exportable for the $(SRCARCH) architecture))
-	$(Q)$(MAKE) $(hdr-inst)=include
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/asm $(hdr-dst)
+	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
+	  $(error Headers not exportable for the $(SRCARCH) architecture))
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst)
 
 PHONY += headers_check_all
 headers_check_all: headers_install_all
@@ -919,8 +919,8 @@ headers_check_all: headers_install_all
 
 PHONY += headers_check
 headers_check: headers_install
-	$(Q)$(MAKE) $(hdr-inst)=include HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/asm $(hdr-dst) HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
 # Modules
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index f9c0d3ba9e8..1595d681343 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -22,7 +22,3 @@ header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += vm86.h
 header-y += vsyscall.h
-
-genhdr-y += unistd_32.h
-genhdr-y += unistd_64.h
-genhdr-y += unistd_x32.h
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 174b0324841..f325af26107 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -1,5 +1,5 @@
 out := $(obj)/../include/generated/asm
-uapi := $(obj)/../include/generated/asm
+uapi := $(obj)/../include/generated/uapi/asm
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 0236a3b346f..3f4c207a93b 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -20,8 +20,6 @@ header-y += netfilter_ipv6/
 header-y += usb/
 header-y += wimax/
 
-genhdr-y += version.h
-
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
 		  $(srctree)/include/asm-$(SRCARCH)/a.out.h \
 		  $(INSTALL_HDR_PATH)/include/asm-*/a.out.h),)
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 13a9cf4cb6f..b0fd4d03499 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -20,3 +20,5 @@ header-y += netfilter_ipv4/
 header-y += netfilter_ipv6/
 header-y += usb/
 header-y += wimax/
+
+genhdr-y += version.h
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 463b95acc15..06ba4a70bd4 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -8,7 +8,7 @@
 # ==========================================================================
 
 # called may set destination dir (when installing to asm/)
-_dst := $(if $(dst),$(dst),$(obj))
+_dst := $(or $(destination-y),$(dst),$(obj))
 
 # generated header directory
 gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
@@ -16,47 +16,64 @@ gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 kbuild-file := $(srctree)/$(obj)/Kbuild
 include $(kbuild-file)
 
-_dst := $(if $(destination-y),$(destination-y),$(_dst))
+old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
+ifneq ($(wildcard $(old-kbuild-file)),)
+include $(old-kbuild-file)
+endif
 
 include scripts/Kbuild.include
 
-install       := $(INSTALL_HDR_PATH)/$(_dst)
+installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
 header-y      := $(sort $(header-y))
 subdirs       := $(patsubst %/,%,$(filter %/, $(header-y)))
 header-y      := $(filter-out %/, $(header-y))
 
 # files used to track state of install/check
-install-file  := $(install)/.install
-check-file    := $(install)/.check
+install-file  := $(installdir)/.install
+check-file    := $(installdir)/.check
 
 # generic-y list all files an architecture uses from asm-generic
 # Use this to build a list of headers which require a wrapper
 wrapper-files := $(filter $(header-y), $(generic-y))
 
+srcdir        := $(srctree)/$(obj)
+gendir        := $(objtree)/$(gen)
+
+oldsrcdir     := $(srctree)/$(subst /uapi,,$(obj))
+
 # all headers files for this dir
 header-y      := $(filter-out $(generic-y), $(header-y))
 all-files     := $(header-y) $(genhdr-y) $(wrapper-files)
-input-files   := $(addprefix $(srctree)/$(obj)/,$(header-y)) \
-                 $(addprefix $(objtree)/$(gen)/,$(genhdr-y))
-output-files  := $(addprefix $(install)/, $(all-files))
+output-files  := $(addprefix $(installdir)/, $(all-files))
+
+input-files   := $(foreach hdr, $(header-y), \
+		   $(or \
+			$(wildcard $(srcdir)/$(hdr)), \
+			$(wildcard $(oldsrcdir)/$(hdr)), \
+			$(error Missing UAPI file $(srcdir)/$(hdr)) \
+		   )) \
+		 $(foreach hdr, $(genhdr-y), \
+		   $(or \
+			$(wildcard $(gendir)/$(hdr)), \
+			$(error Missing generated UAPI file $(gendir)/$(hdr)) \
+		   ))
 
 # Work out what needs to be removed
-oldheaders    := $(patsubst $(install)/%,%,$(wildcard $(install)/*.h))
+oldheaders    := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h))
 unwanted      := $(filter-out $(all-files),$(oldheaders))
 
 # Prefix unwanted with full paths to $(INSTALL_HDR_PATH)
-unwanted-file := $(addprefix $(install)/, $(unwanted))
+unwanted-file := $(addprefix $(installdir)/, $(unwanted))
 
 printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
 
 quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
                             file$(if $(word 2, $(all-files)),s))
       cmd_install = \
-        $(PERL) $< $(srctree)/$(obj) $(install) $(SRCARCH) $(header-y); \
-        $(PERL) $< $(objtree)/$(gen) $(install) $(SRCARCH) $(genhdr-y); \
+        $(PERL) $< $(installdir) $(SRCARCH) $(input-files); \
         for F in $(wrapper-files); do                                   \
-                echo "\#include <asm-generic/$$F>" > $(install)/$$F;    \
+                echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
         done;                                                           \
         touch $@
 
@@ -67,7 +84,7 @@ quiet_cmd_check = CHECK   $(printdir) ($(words $(all-files)) files)
 # Headers list can be pretty long, xargs helps to avoid
 # the "Argument list too long" error.
       cmd_check = for f in $(all-files); do                          \
-                  echo "$(install)/$${f}"; done                      \
+                  echo "$(installdir)/$${f}"; done                      \
                   | xargs                                            \
                   $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \
 	          touch $@
diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
index 48462be328b..239d22d4207 100644
--- a/scripts/headers_install.pl
+++ b/scripts/headers_install.pl
@@ -4,8 +4,7 @@
 # user space and copy the files to their destination.
 #
 # Usage: headers_install.pl readdir installdir arch [files...]
-# readdir:    dir to open files
-# installdir: dir to install the files
+# installdir: dir to install the files to
 # arch:       current architecture
 #             arch is used to force a reinstallation when the arch
 #             changes because kbuild then detect a command line change.
@@ -18,15 +17,18 @@
 
 use strict;
 
-my ($readdir, $installdir, $arch, @files) = @ARGV;
+my ($installdir, $arch, @files) = @ARGV;
 
 my $unifdef = "scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__";
 
-foreach my $file (@files) {
+foreach my $filename (@files) {
+	my $file = $filename;
+	$file =~ s!^.*/!!;
+
 	my $tmpfile = "$installdir/$file.tmp";
 
-	open(my $in, '<', "$readdir/$file")
-	    or die "$readdir/$file: $!\n";
+	open(my $in, '<', $filename)
+	    or die "$filename: $!\n";
 	open(my $out, '>', $tmpfile)
 	    or die "$tmpfile: $!\n";
 	while (my $line = <$in>) {
-- 
cgit v1.2.3-70-g09d2


From b1e0d8b70fa31821ebca3965f2ef8619d7c5e316 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 2 Oct 2012 16:42:36 +0200
Subject: kbuild: Fix gcc -x syntax

The correct syntax for gcc -x is "gcc -x assembler", not
"gcc -xassembler". Even though the latter happens to work, the former
is what is documented in the manual page and thus what gcc wrappers
such as icecream do expect.

This isn't a cosmetic change. The missing space prevents icecream from
recognizing compilation tasks it can't handle, leading to silent kernel
miscompilations.

Besides me, credits go to Michael Matz and Dirk Mueller for
investigating the miscompilation issue and tracking it down to this
incorrect -x parameter syntax.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Cc: Bernhard Walle <bernhard@bwalle.de>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/mips/Makefile                         |  2 +-
 arch/mips/kernel/Makefile                  |  2 +-
 arch/x86/Makefile                          |  2 +-
 scripts/Kbuild.include                     | 12 ++++++------
 scripts/gcc-version.sh                     |  6 +++---
 scripts/gcc-x86_32-has-stack-protector.sh  |  2 +-
 scripts/gcc-x86_64-has-stack-protector.sh  |  2 +-
 scripts/kconfig/check.sh                   |  2 +-
 scripts/kconfig/lxdialog/check-lxdialog.sh |  2 +-
 tools/perf/Makefile                        |  2 +-
 tools/power/cpupower/Makefile              |  2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 764e37a9dbb..654b1ad39f0 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -225,7 +225,7 @@ KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 LDFLAGS			+= -m $(ld-emul)
 
 ifdef CONFIG_MIPS
-CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -xc /dev/null | \
+CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
 ifdef CONFIG_64BIT
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index fdaf65e1a99..c6136cb4cd4 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -104,7 +104,7 @@ obj-$(CONFIG_MIPS_MACHINE)	+= mips_machine.o
 
 obj-$(CONFIG_OF)		+= prom.o
 
-CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
+CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
 
 obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)	+= 8250-platform.o
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861e..cb8ac935df8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -88,7 +88,7 @@ endif
 ifdef CONFIG_X86_X32
 	x32_ld_ok := $(call try-run,\
 			/bin/echo -e '1: .quad 1b' | \
-			$(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \
+			$(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \
 			$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
 			$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
         ifeq ($(x32_ld_ok),y)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 6a3ee981931..8bb8d3a9f01 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -98,24 +98,24 @@ try-run = $(shell set -e;		\
 # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
 
 as-option = $(call try-run,\
-	$(CC) $(KBUILD_CFLAGS) $(1) -c -xassembler /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
 
 # as-instr
 # Usage: cflags-y += $(call as-instr,instr,option1,option2)
 
 as-instr = $(call try-run,\
-	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
+	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -125,7 +125,7 @@ cc-option-align = $(subst -functions=0,,\
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -xc /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-version
 # Usage gcc-ver := $(call cc-version)
@@ -143,7 +143,7 @@ cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
 # cc-ldoption
 # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
 cc-ldoption = $(call try-run,\
-	$(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # ld-option
 # Usage: LDFLAGS += $(call ld-option, -X)
diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
index debecb5561c..7f2126df91f 100644
--- a/scripts/gcc-version.sh
+++ b/scripts/gcc-version.sh
@@ -22,10 +22,10 @@ if [ ${#compiler} -eq 0 ]; then
 	exit 1
 fi
 
-MAJOR=$(echo __GNUC__ | $compiler -E -xc - | tail -n 1)
-MINOR=$(echo __GNUC_MINOR__ | $compiler -E -xc - | tail -n 1)
+MAJOR=$(echo __GNUC__ | $compiler -E -x c - | tail -n 1)
+MINOR=$(echo __GNUC_MINOR__ | $compiler -E -x c - | tail -n 1)
 if [ "x$with_patchlevel" != "x" ] ; then
-	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -xc - | tail -n 1)
+	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -x c - | tail -n 1)
 	printf "%02d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
 else
 	printf "%02d%02d\\n" $MAJOR $MINOR
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
index 29493dc4528..12dbd0b11ea 100644
--- a/scripts/gcc-x86_32-has-stack-protector.sh
+++ b/scripts/gcc-x86_32-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
index afaec618b39..973e8c14156 100644
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
index fa59cbf9d62..854d9c7c675 100755
--- a/scripts/kconfig/check.sh
+++ b/scripts/kconfig/check.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Needed for systems without gettext
-$* -xc -o /dev/null - > /dev/null 2>&1 << EOF
+$* -x c -o /dev/null - > /dev/null 2>&1 << EOF
 #include <libintl.h>
 int main()
 {
diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh
index e3b12c01041..c8e8a715475 100644
--- a/scripts/kconfig/lxdialog/check-lxdialog.sh
+++ b/scripts/kconfig/lxdialog/check-lxdialog.sh
@@ -38,7 +38,7 @@ trap "rm -f $tmp" 0 1 2 3 15
 
 # Check if we can link to ncurses
 check() {
-        $cc -xc - -o $tmp 2>/dev/null <<'EOF'
+        $cc -x c - -o $tmp 2>/dev/null <<'EOF'
 #include CURSES_LOC
 main() {}
 EOF
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 77f124fe57a..d9776568e8c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -62,7 +62,7 @@ ifeq ($(ARCH),x86_64)
 	ARCH := x86
 	IS_X86_64 := 0
 	ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
-		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
+		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
 	endif
 	ifeq (${IS_X86_64}, 1)
 		RAW_ARCH := x86_64
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index a93e06cfcc2..cf397bd26d0 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -111,7 +111,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo;
 export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS
 
 # check if compiler option is supported
-cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
 
 # use '-Os' optimization if available, else use -O2
 OPTIMIZATION := $(call cc-supports,-Os,-O2)
-- 
cgit v1.2.3-70-g09d2


From e7a570ff7dff9af6e54ff5e580a61ec7652137a0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 5 Sep 2012 12:04:14 +0800
Subject: asm-generic: Add default clkdev.h

Ease the deployment of clkdev by providing a default asm/clkdev.h for
use if the arch does not have an include/asm/clkdev.h.

Due to limitations in Kbuild we manually add clkdev.h to all
architectures that don't have one rather than having the header appear
by default.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Reviewed-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/Kbuild      |  2 ++
 arch/avr32/include/asm/Kbuild      |  2 ++
 arch/cris/include/asm/Kbuild       |  2 ++
 arch/frv/include/asm/Kbuild        |  1 +
 arch/h8300/include/asm/Kbuild      |  2 ++
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  2 ++
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mn10300/include/asm/Kbuild    |  2 ++
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  2 ++
 arch/score/include/asm/Kbuild      |  2 ++
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  2 +-
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  2 ++
 arch/xtensa/include/asm/Kbuild     |  2 ++
 include/asm-generic/clkdev.h       | 28 ++++++++++++++++++++++++++++
 23 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/clkdev.h

(limited to 'arch/x86')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index e423defed91..d97d66334e6 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -1,5 +1,7 @@
 include include/asm-generic/Kbuild.asm
 
+generic-y += clkdev.h
+
 header-y += compiler.h
 header-y += console.h
 header-y += fpu.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 3136628ba8d..e3ba7bca06f 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,3 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
+generic-y	+= clkdev.h
+
 header-y	+= cachectl.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 04d02a51c5e..a8eab26a1ec 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -7,3 +7,5 @@ header-y += ethernet.h
 header-y += etraxgpio.h
 header-y += rs485.h
 header-y += sync_serial.h
+
+generic-y += clkdev.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 5be6663cfee..13cd044aabd 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,3 +2,4 @@ include include/asm-generic/Kbuild.asm
 
 header-y += registers.h
 header-y += termios.h
+generic-y += clkdev.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index c68e1680da0..0e152a93c12 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= clkdev.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 06906427c0a..3364b6966d2 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -7,6 +7,7 @@ header-y += user.h
 generic-y += auxvec.h
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += device.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index d4eb9383f5f..58f3d14a6cd 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -13,3 +13,4 @@ header-y += ptrace_offsets.h
 header-y += rse.h
 header-y += ucontext.h
 header-y += ustack.h
+generic-y += clkdev.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c68e1680da0..0e152a93c12 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= clkdev.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index a74e5d95c38..bfe675f0fae 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
 header-y += cachectl.h
 
 generic-y += bitsperlong.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += emergency-restart.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index db5294c30ca..48510f6cec8 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 header-y  += elf.h
+generic-y += clkdev.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index c68e1680da0..0d20f5526dd 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y += clkdev.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 0922959663a..7140b6b2644 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += bug.h
 generic-y += bugs.h
 generic-y += cacheflush.h
 generic-y += checksum.h
+generic-y += clkdev.h
 generic-y += cmpxchg.h
 generic-y += cmpxchg-local.h
 generic-y += cputime.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4383707d980..0587f62e5b7 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,4 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
 header-y += pdc.h
+generic-y += clkdev.h
 generic-y += word-at-a-time.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1ed18..ace53dbde2c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -35,4 +35,5 @@ header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
 
+generic-y += clkdev.h
 generic-y += rwsem.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 287d7bbb6d3..f18fc796bee 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -13,3 +13,5 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
+
+generic-y += clkdev.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index b367abd4620..ec697aeefd0 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,3 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
 header-y +=
+
+generic-y += clkdev.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 67f83e0a0d6..f80ff93f6f7 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -17,6 +17,7 @@ header-y += uctx.h
 header-y += utrap.h
 header-y += watchdog.h
 
+generic-y += clkdev.h
 generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 5bd71994452..ea2e8ea3eb6 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -8,6 +8,7 @@ header-y += hardwall.h
 
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += div64.h
 generic-y += emergency-restart.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index fff24352255..0f6e7b32826 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,4 @@
 generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
 generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
-generic-y += switch_to.h
+generic-y += switch_to.h clkdev.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 34b789b7111..123c59a06c1 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += atomic.h
 generic-y += auxvec.h
 generic-y += bitsperlong.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += device.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index f9c0d3ba9e8..66e5f0ef052 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -26,3 +26,5 @@ header-y += vsyscall.h
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
+
+generic-y += clkdev.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c68e1680da0..0d20f5526dd 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y += clkdev.h
diff --git a/include/asm-generic/clkdev.h b/include/asm-generic/clkdev.h
new file mode 100644
index 00000000000..90a32a61dd2
--- /dev/null
+++ b/include/asm-generic/clkdev.h
@@ -0,0 +1,28 @@
+/*
+ *  include/asm-generic/clkdev.h
+ *
+ * Based on the ARM clkdev.h:
+ *  Copyright (C) 2008 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Helper for the clk API to assist looking up a struct clk.
+ */
+#ifndef __ASM_CLKDEV_H
+#define __ASM_CLKDEV_H
+
+#include <linux/slab.h>
+
+struct clk;
+
+static inline int __clk_get(struct clk *clk) { return 1; }
+static inline void __clk_put(struct clk *clk) { }
+
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From c9f97a27ceee84998999bf3341e6d5d207b05539 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 19 Sep 2012 09:40:30 +0300
Subject: crypto: x86/glue_helper - fix storing of new IV in CBC encryption

Glue_helper incorrectly XORs new IV over old IV at end of CBC encryption
function when it should store. This causes CBC encryption to give
incorrect output on multi-page encryption requests.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/glue_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 4854f0f31e4..30b3927bd73 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -110,7 +110,7 @@ static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
 		nbytes -= bsize;
 	} while (nbytes >= bsize);
 
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	*(u128 *)walk->iv = *iv;
 	return nbytes;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e717bf4e4fe8adc519f25c4ff93ee50ed0a36710 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Wed, 26 Sep 2012 14:12:52 -0400
Subject: perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU

The following patch adds perf_event support for the Xeon-Phi
PMU, as documented in the "Intel Xeon Phi Coprocessor (codename:
Knights Corner) Performance Monitoring Units" manual.

Even though it is a co-processor, a Phi runs a full Linux
environment and can support performance counters.

This is just barebones support, it does not add support for
interesting new features such as the SPFLT intruction that
allows starting/stopping events without entering the kernel.

The PMU internally is just like that of an original Pentium, but
a "P6-like" MSR interface is provided.  The interface is
different enough from a real P6 that it's not easy (or
practical) to re-use the code in  perf_event_p6.c

Acked-by: Lawrence F Meadows <lawrence.f.meadows@intel.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: eranian@gmail.com
Cc: Lawrence F <lawrence.f.meadows@intel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1209261405320.8398@vincent-weaver-1.um.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/msr-index.h       |   5 +
 arch/x86/kernel/cpu/Makefile           |   2 +-
 arch/x86/kernel/cpu/perf_event.h       |   2 +
 arch/x86/kernel/cpu/perf_event_intel.c |   2 +
 arch/x86/kernel/cpu/perf_event_knc.c   | 248 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perfctr-watchdog.c |   4 +
 6 files changed, 262 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_knc.c

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385a..07f96cb5cdb 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,11 @@
 #define MSR_P6_EVNTSEL0			0x00000186
 #define MSR_P6_EVNTSEL1			0x00000187
 
+#define MSR_KNC_PERFCTR0               0x00000020
+#define MSR_KNC_PERFCTR1               0x00000021
+#define MSR_KNC_EVNTSEL0               0x00000028
+#define MSR_KNC_EVNTSEL1               0x00000029
+
 /* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d30a6a9a012..a0e067d3d96 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 ifdef CONFIG_PERF_EVENTS
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
 endif
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8b6defe7eef..271d2570029 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
 
 int p6_pmu_init(void);
 
+int knc_pmu_init(void);
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6bca492b854..324bb523d9d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
 		switch (boot_cpu_data.x86) {
 		case 0x6:
 			return p6_pmu_init();
+		case 0xb:
+			return knc_pmu_init();
 		case 0xf:
 			return p4_pmu_init();
 		}
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
new file mode 100644
index 00000000000..7c46bfdbc37
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -0,0 +1,248 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x002a,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x0016,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0028,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0029,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x0012,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x002b,
+};
+
+static __initconst u64 knc_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		/* On Xeon Phi event "0" is a valid DATA_READ          */
+		/*   (L1 Data Cache Reads) Instruction.                */
+		/* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
+		/* bit will always be set in x86_pmu_hw_config().      */
+		[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+						/* DATA_READ           */
+		[ C(RESULT_MISS)   ] = 0x0003,	/* DATA_READ_MISS      */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0001,	/* DATA_WRITE          */
+		[ C(RESULT_MISS)   ] = 0x0004,	/* DATA_WRITE_MISS     */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0011,	/* L1_DATA_PF1         */
+		[ C(RESULT_MISS)   ] = 0x001c,	/* L1_DATA_PF1_MISS    */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x000c,	/* CODE_READ          */
+		[ C(RESULT_MISS)   ] = 0x000e,	/* CODE_CACHE_MISS    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x10cb,	/* L2_READ_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x10cc,	/* L2_WRITE_HIT */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x10fc,	/* L2_DATA_PF2      */
+		[ C(RESULT_MISS)   ] = 0x10fe,	/* L2_DATA_PF2_MISS */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+						/* DATA_READ */
+						/* see note on L1 OP_READ */
+		[ C(RESULT_MISS)   ] = 0x0002,	/* DATA_PAGE_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0001,	/* DATA_WRITE */
+		[ C(RESULT_MISS)   ] = 0x0002,	/* DATA_PAGE_WALK */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x000c,	/* CODE_READ */
+		[ C(RESULT_MISS)   ] = 0x000d,	/* CODE_PAGE_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0012,	/* BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x002b,	/* BRANCHES_MISPREDICTED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+	return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xc3, 0x1),	/* HWP_L2HIT */
+	INTEL_EVENT_CONSTRAINT(0xc4, 0x1),	/* HWP_L2MISS */
+	INTEL_EVENT_CONSTRAINT(0xc8, 0x1),	/* L2_READ_HIT_E */
+	INTEL_EVENT_CONSTRAINT(0xc9, 0x1),	/* L2_READ_HIT_M */
+	INTEL_EVENT_CONSTRAINT(0xca, 0x1),	/* L2_READ_HIT_S */
+	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),	/* L2_READ_MISS */
+	INTEL_EVENT_CONSTRAINT(0xcc, 0x1),	/* L2_WRITE_HIT */
+	INTEL_EVENT_CONSTRAINT(0xce, 0x1),	/* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+	INTEL_EVENT_CONSTRAINT(0xcf, 0x1),	/* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+	INTEL_EVENT_CONSTRAINT(0xd7, 0x1),	/* L2_VICTIM_REQ_WITH_DATA */
+	INTEL_EVENT_CONSTRAINT(0xe3, 0x1),	/* SNP_HITM_BUNIT */
+	INTEL_EVENT_CONSTRAINT(0xe6, 0x1),	/* SNP_HIT_L2 */
+	INTEL_EVENT_CONSTRAINT(0xe7, 0x1),	/* SNP_HITM_L2 */
+	INTEL_EVENT_CONSTRAINT(0xf1, 0x1),	/* L2_DATA_READ_MISS_CACHE_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf2, 0x1),	/* L2_DATA_WRITE_MISS_CACHE_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf6, 0x1),	/* L2_DATA_READ_MISS_MEM_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf7, 0x1),	/* L2_DATA_WRITE_MISS_MEM_FILL */
+	INTEL_EVENT_CONSTRAINT(0xfc, 0x1),	/* L2_DATA_PF2 */
+	INTEL_EVENT_CONSTRAINT(0xfd, 0x1),	/* L2_DATA_PF2_DROP */
+	INTEL_EVENT_CONSTRAINT(0xfe, 0x1),	/* L2_DATA_PF2_MISS */
+	INTEL_EVENT_CONSTRAINT(0xff, 0x1),	/* L2_DATA_HIT_INFLIGHT_PF2 */
+	EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS		0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL	0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL		0x0000002f
+
+#define KNC_ENABLE_COUNTER0			0x00000001
+#define KNC_ENABLE_COUNTER1			0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+	u64 val;
+
+	rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+	val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+	wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+	u64 val;
+
+	rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+	val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+	wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+PMU_FORMAT_ATTR(event,	"config:0-7"	);
+PMU_FORMAT_ATTR(umask,	"config:8-15"	);
+PMU_FORMAT_ATTR(edge,	"config:18"	);
+PMU_FORMAT_ATTR(inv,	"config:23"	);
+PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
+
+static struct attribute *intel_knc_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask.attr,
+	NULL,
+};
+
+static __initconst struct x86_pmu knc_pmu = {
+	.name			= "knc",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= knc_pmu_disable_all,
+	.enable_all		= knc_pmu_enable_all,
+	.enable			= knc_pmu_enable_event,
+	.disable		= knc_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_KNC_EVNTSEL0,
+	.perfctr		= MSR_KNC_PERFCTR0,
+	.event_map		= knc_pmu_event_map,
+	.max_events             = ARRAY_SIZE(knc_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_counters		= 2,
+	/* in theory 40 bits, early silicon is buggy though */
+	.cntval_bits		= 32,
+	.cntval_mask		= (1ULL << 32) - 1,
+	.get_event_constraints	= x86_get_event_constraints,
+	.event_constraints	= knc_event_constraints,
+	.format_attrs		= intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+	x86_pmu = knc_pmu;
+
+	memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
+		sizeof(hw_cache_event_ids));
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 966512b2cac..2e8caf03f59 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 		switch (boot_cpu_data.x86) {
 		case 6:
 			return msr - MSR_P6_PERFCTR0;
+		case 11:
+			return msr - MSR_KNC_PERFCTR0;
 		case 15:
 			return msr - MSR_P4_BPU_PERFCTR0;
 		}
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 		switch (boot_cpu_data.x86) {
 		case 6:
 			return msr - MSR_P6_EVNTSEL0;
+		case 11:
+			return msr - MSR_KNC_EVNTSEL0;
 		case 15:
 			return msr - MSR_P4_BSU_ESCR0;
 		}
-- 
cgit v1.2.3-70-g09d2


From 2e132b12f78d88672711ae1d87624951de1089ca Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 12 Sep 2012 12:59:44 +0200
Subject: perf/AMD/IBS: Add sysfs support

Add sysfs format entries for AMD IBS PMUs:

 # find /sys/bus/event_source/devices/ibs_*/format
 /sys/bus/event_source/devices/ibs_fetch/format
 /sys/bus/event_source/devices/ibs_fetch/format/rand_en
 /sys/bus/event_source/devices/ibs_op/format
 /sys/bus/event_source/devices/ibs_op/format/cnt_ctl

This allows to specify following IBS options:

 $ perf record -e ibs_fetch/rand_en=1/GH ...
 $ perf record -e ibs_op/cnt_ctl=1/GH ...

Option cnt_ctl is only enabled if the IBS_CAPS_OPCNT bit is set in IBS
cpuid feature flags (AMD family 10h RevC and above).

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1347447584-28405-1-git-send-email-robert.richter@amd.com
[ Added small readability improvements. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 61 +++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index eebd5ffe1bb..6336bcbd061 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
 };
 
 struct perf_ibs {
-	struct pmu	pmu;
-	unsigned int	msr;
-	u64		config_mask;
-	u64		cnt_mask;
-	u64		enable_mask;
-	u64		valid_mask;
-	u64		max_period;
-	unsigned long	offset_mask[1];
-	int		offset_max;
-	struct cpu_perf_ibs __percpu *pcpu;
-	u64		(*get_count)(u64 config);
+	struct pmu			pmu;
+	unsigned int			msr;
+	u64				config_mask;
+	u64				cnt_mask;
+	u64				enable_mask;
+	u64				valid_mask;
+	u64				max_period;
+	unsigned long			offset_mask[1];
+	int				offset_max;
+	struct cpu_perf_ibs __percpu	*pcpu;
+
+	struct attribute		**format_attrs;
+	struct attribute_group		format_group;
+	const struct attribute_group	*attr_groups[2];
+
+	u64				(*get_count)(u64 config);
 };
 
 struct perf_ibs_data {
@@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
 
 static void perf_ibs_read(struct perf_event *event) { }
 
+PMU_FORMAT_ATTR(rand_en,	"config:57");
+PMU_FORMAT_ATTR(cnt_ctl,	"config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+	&format_attr_rand_en.attr,
+	NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+	NULL,	/* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+	NULL,
+};
+
 static struct perf_ibs perf_ibs_fetch = {
 	.pmu = {
 		.task_ctx_nr	= perf_invalid_context,
@@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
 	.max_period		= IBS_FETCH_MAX_CNT << 4,
 	.offset_mask		= { MSR_AMD64_IBSFETCH_REG_MASK },
 	.offset_max		= MSR_AMD64_IBSFETCH_REG_COUNT,
+	.format_attrs		= ibs_fetch_format_attrs,
 
 	.get_count		= get_ibs_fetch_count,
 };
@@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
 	.max_period		= IBS_OP_MAX_CNT << 4,
 	.offset_mask		= { MSR_AMD64_IBSOP_REG_MASK },
 	.offset_max		= MSR_AMD64_IBSOP_REG_COUNT,
+	.format_attrs		= ibs_op_format_attrs,
 
 	.get_count		= get_ibs_op_count,
 };
@@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 
 	perf_ibs->pcpu = pcpu;
 
+	/* register attributes */
+	if (perf_ibs->format_attrs[0]) {
+		memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+		perf_ibs->format_group.name	= "format";
+		perf_ibs->format_group.attrs	= perf_ibs->format_attrs;
+
+		memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+		perf_ibs->attr_groups[0]	= &perf_ibs->format_group;
+		perf_ibs->pmu.attr_groups	= perf_ibs->attr_groups;
+	}
+
 	ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
 	if (ret) {
 		perf_ibs->pcpu = NULL;
@@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 
 static __init int perf_event_ibs_init(void)
 {
+	struct attribute **attr = ibs_op_format_attrs;
+
 	if (!ibs_caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
 	perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
-	if (ibs_caps & IBS_CAPS_OPCNT)
+
+	if (ibs_caps & IBS_CAPS_OPCNT) {
 		perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+		*attr++ = &format_attr_cnt_ctl.attr;
+	}
 	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
 	register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 
-- 
cgit v1.2.3-70-g09d2


From 75fdd155eaf755aa183ca9844a9a178b7a0e3959 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 4 Oct 2012 17:11:42 -0700
Subject: sections: fix section conflicts in arch/x86

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/apic.h          | 2 +-
 arch/x86/kernel/apic/apic_numachip.c | 4 ++--
 arch/x86/kernel/rtc.c                | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f34261296ff..33880342223 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -409,7 +409,7 @@ extern struct apic *apic;
  * to enforce the order with in them.
  */
 #define apic_driver(sym)					\
-	static struct apic *__apicdrivers_##sym __used		\
+	static const struct apic *__apicdrivers_##sym __used		\
 	__aligned(sizeof(struct apic *))			\
 	__section(.apicdrivers) = { &sym }
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index bc552cff257..a65829ac2b9 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@
 
 static int numachip_system __read_mostly;
 
-static struct apic apic_numachip __read_mostly;
+static const struct apic apic_numachip __read_mostly;
 
 static unsigned int get_apic_id(unsigned long x)
 {
@@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static struct apic apic_numachip __refconst = {
+static const struct apic apic_numachip __refconst = {
 
 	.name				= "NumaConnect system",
 	.probe				= numachip_probe,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index af6db6ec5b2..4929c1be0ac 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -225,7 +225,7 @@ static struct platform_device rtc_device = {
 static __init int add_rtc_cmos(void)
 {
 #ifdef CONFIG_PNP
-	static const char *ids[] __initconst =
+	static const char * const  const ids[] __initconst =
 	    { "PNP0b00", "PNP0b01", "PNP0b02", };
 	struct pnp_dev *dev;
 	struct pnp_id *id;
-- 
cgit v1.2.3-70-g09d2


From 751f409db6216ebd134a94f6dcd97779933a5106 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Thu, 4 Oct 2012 17:15:31 -0700
Subject: compat: move compat_siginfo_t definition to asm/compat.h

This is a preparatory patch for the introduction of NT_SIGINFO elf note.

Make the location of compat_siginfo_t uniform across eight architectures
which have it.  Now it can be pulled in by including asm/compat.h or
linux/compat.h.

Most of the copies are verbatim.  compat_uid[32]_t had to be replaced by
__compat_uid[32]_t.  compat_uptr_t had to be moved up before
compat_siginfo_t in asm/compat.h on a several architectures (tile already
had it moved up).  compat_sigval_t had to be relocated from linux/compat.h
to asm/compat.h.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: "Jonathan M. Foote" <jmfoote@cert.org>
Cc: Roland McGrath <roland@hack.frob.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/compat.h       | 60 +++++++++++++++++++++++++++-
 arch/arm64/kernel/signal32.c          | 53 -------------------------
 arch/mips/include/asm/compat-signal.h | 62 -----------------------------
 arch/mips/include/asm/compat.h        | 69 +++++++++++++++++++++++++++++++-
 arch/parisc/include/asm/compat.h      | 59 ++++++++++++++++++++++++++-
 arch/parisc/kernel/signal32.h         | 52 ------------------------
 arch/powerpc/include/asm/compat.h     | 60 +++++++++++++++++++++++++++-
 arch/powerpc/include/asm/siginfo.h    |  1 -
 arch/powerpc/kernel/ppc32.h           | 51 ------------------------
 arch/s390/include/asm/compat.h        | 75 ++++++++++++++++++++++++++++++++++-
 arch/s390/kernel/compat_linux.h       | 68 -------------------------------
 arch/sparc/include/asm/compat.h       | 61 +++++++++++++++++++++++++++-
 arch/sparc/include/asm/siginfo.h      |  1 -
 arch/sparc/kernel/signal32.c          | 52 ------------------------
 arch/tile/include/asm/compat.h        | 62 +++++++++++++++++++++++++++++
 arch/tile/kernel/compat_signal.c      | 57 --------------------------
 arch/x86/include/asm/compat.h         | 74 +++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/ia32.h           | 67 -------------------------------
 include/linux/compat.h                |  5 ---
 19 files changed, 513 insertions(+), 476 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index a670a33ad73..37e610dc084 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -55,6 +55,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -130,6 +131,64 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		/* The padding is the same size as AArch64. */
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			__compat_uid32_t _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			compat_uptr_t _addr; /* faulting insn/memory ref. */
+			short _addr_lsb; /* LSB of the reported address */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -139,7 +198,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index ac74c2f261e..0790a87a434 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -30,59 +30,6 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		/* The padding is the same size as AArch64. */
-		int _pad[SI_PAD_SIZE];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			__compat_uid32_t _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			__compat_uid32_t _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			__compat_uid32_t _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			compat_uptr_t _addr; /* faulting insn/memory ref. */
-			short _addr_lsb; /* LSB of the reported address */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
 struct compat_sigaction {
 	compat_uptr_t			sa_handler;
 	compat_ulong_t			sa_flags;
diff --git a/arch/mips/include/asm/compat-signal.h b/arch/mips/include/asm/compat-signal.h
index 368a99e5c3e..6599a901b63 100644
--- a/arch/mips/include/asm/compat-signal.h
+++ b/arch/mips/include/asm/compat-signal.h
@@ -10,68 +10,6 @@
 
 #include <asm/uaccess.h>
 
-#define SI_PAD_SIZE32   ((SI_MAX_SIZE/sizeof(int)) - 3)
-
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_code;
-	int si_errno;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			compat_uid_t _uid;	/* sender's uid */
-		} _kill;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			compat_uid_t _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* IRIX SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			compat_clock_t _utime;
-			int _status;		/* exit code */
-			compat_clock_t _stime;
-		} _irix_sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			s32 _addr; /* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL, SIGXFSZ (To do ...)  */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-
-		/* POSIX.1b timers */
-		struct {
-			timer_t _tid;		/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			compat_uid_t _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-	} _sifields;
-} compat_siginfo_t;
-
 static inline int __copy_conv_sigset_to_user(compat_sigset_t __user *d,
 	const sigset_t *s)
 {
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index b77df0366ee..58277e0e9cd 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -43,6 +43,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -124,6 +125,73 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_code;
+	int si_errno;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid_t _uid;	/* sender's uid */
+		} _kill;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			__compat_uid_t _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* IRIX SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			compat_clock_t _utime;
+			int _status;		/* exit code */
+			compat_clock_t _stime;
+		} _irix_sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			s32 _addr; /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL, SIGXFSZ (To do ...)  */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+
+		/* POSIX.1b timers */
+		struct {
+			timer_t _tid;		/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid_t _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -133,7 +201,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 760f331d4fa..db7a662691a 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -36,6 +36,7 @@ typedef s64	compat_s64;
 typedef u32	compat_uint_t;
 typedef u32	compat_ulong_t;
 typedef u64	compat_u64;
+typedef u32	compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t		tv_sec;
@@ -127,6 +128,63 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;      /* sender's pid */
+			unsigned int _uid;      /* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;            /* timer id */
+			int _overrun;           /* overrun count */
+			char _pad[sizeof(unsigned int) - sizeof(int)];
+			compat_sigval_t _sigval;        /* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;      /* sender's pid */
+			unsigned int _uid;      /* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;      /* which child */
+			unsigned int _uid;      /* sender's uid */
+			int _status;            /* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;     /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -136,7 +194,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index c7800846422..08a88b5349a 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -55,58 +55,6 @@ struct k_sigaction32 {
 	struct compat_sigaction sa;
 };
 
-typedef struct compat_siginfo {
-        int si_signo;
-        int si_errno;
-        int si_code;
-
-        union {
-                int _pad[((128/sizeof(int)) - 3)];
-
-                /* kill() */
-                struct {
-                        unsigned int _pid;      /* sender's pid */
-                        unsigned int _uid;      /* sender's uid */
-                } _kill;
-
-                /* POSIX.1b timers */
-                struct {
-                        compat_timer_t _tid;            /* timer id */
-                        int _overrun;           /* overrun count */
-                        char _pad[sizeof(unsigned int) - sizeof(int)];
-                        compat_sigval_t _sigval;        /* same as below */
-                        int _sys_private;       /* not to be passed to user */
-                } _timer;
-
-                /* POSIX.1b signals */
-                struct {
-                        unsigned int _pid;      /* sender's pid */
-                        unsigned int _uid;      /* sender's uid */
-                        compat_sigval_t _sigval;
-                } _rt;
-
-                /* SIGCHLD */
-                struct {
-                        unsigned int _pid;      /* which child */
-                        unsigned int _uid;      /* sender's uid */
-                        int _status;            /* exit code */
-                        compat_clock_t _utime;
-                        compat_clock_t _stime;
-                } _sigchld;
-
-                /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-                struct {
-                        unsigned int _addr;     /* faulting insn/memory ref. */
-                } _sigfault;
-
-                /* SIGPOLL */
-                struct {
-                        int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
-                        int _fd;
-                } _sigpoll;
-        } _sifields;
-} compat_siginfo_t;
-
 int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from);
 int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
 
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 88e602f6430..84fdf6857c3 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -38,6 +38,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -114,6 +115,64 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			__compat_uid_t _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			__compat_uid_t _uid;		/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;		/* which child */
+			__compat_uid_t _uid;		/* sender's uid */
+			int _status;			/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+		struct {
+			unsigned int _addr; /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -123,7 +182,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/powerpc/include/asm/siginfo.h b/arch/powerpc/include/asm/siginfo.h
index 49495b0534e..ccce3ef5cd8 100644
--- a/arch/powerpc/include/asm/siginfo.h
+++ b/arch/powerpc/include/asm/siginfo.h
@@ -10,7 +10,6 @@
 
 #ifdef __powerpc64__
 #    define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
-#    define SI_PAD_SIZE32		((SI_MAX_SIZE/sizeof(int)) - 3)
 #endif
 
 #include <asm-generic/siginfo.h>
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index dc16aefe1dd..02fb0ee2609 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -16,57 +16,6 @@
 
 /* These are here to support 32-bit syscalls on a 64-bit kernel. */
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			compat_uid_t _uid;		/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;			/* timer id */
-			int _overrun;			/* overrun count */
-			compat_sigval_t _sigval;		/* same as below */
-			int _sys_private;		/* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			compat_uid_t _uid;		/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;		/* which child */
-			compat_uid_t _uid;		/* sender's uid */
-			int _status;			/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
-		struct {
-			unsigned int _addr; /* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
 #define __old_sigaction32	old_sigaction32
 
 struct __old_sigaction32 {
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 234f1d859ce..a34a9d612fc 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -65,6 +65,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -144,6 +145,79 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int	si_signo;
+	int	si_errno;
+	int	si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			pid_t	_pid;	/* sender's pid */
+			uid_t	_uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			pid_t			_pid;	/* sender's pid */
+			uid_t			_uid;	/* sender's uid */
+			compat_sigval_t		_sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			pid_t			_pid;	/* which child */
+			uid_t			_uid;	/* sender's uid */
+			int			_status;/* exit code */
+			compat_clock_t		_utime;
+			compat_clock_t		_stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			__u32	_addr;	/* faulting insn/memory ref. - pointer */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int	_band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int	_fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid		_sifields._kill._pid
+#define si_uid		_sifields._kill._uid
+#define si_status	_sifields._sigchld._status
+#define si_utime	_sifields._sigchld._utime
+#define si_stime	_sifields._sigchld._stime
+#define si_value	_sifields._rt._sigval
+#define si_int		_sifields._rt._sigval.sival_int
+#define si_ptr		_sifields._rt._sigval.sival_ptr
+#define si_addr		_sifields._sigfault._addr
+#define si_band		_sifields._sigpoll._band
+#define si_fd		_sifields._sigpoll._fd
+#define si_tid		_sifields._timer._tid
+#define si_overrun	_sifields._timer._overrun
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -153,7 +227,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 9635d759c2b..90887bd98cf 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -23,74 +23,6 @@ struct old_sigaction32 {
        __u32			sa_flags;
        __u32			sa_restorer;	/* Another 32 bit pointer */
 };
- 
-typedef struct compat_siginfo {
-	int	si_signo;
-	int	si_errno;
-	int	si_code;
-
-	union {
-		int _pad[((128/sizeof(int)) - 3)];
-
-		/* kill() */
-		struct {
-			pid_t	_pid;	/* sender's pid */
-			uid_t	_uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;		/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			pid_t			_pid;	/* sender's pid */
-			uid_t			_uid;	/* sender's uid */
-			compat_sigval_t		_sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			pid_t			_pid;	/* which child */
-			uid_t			_uid;	/* sender's uid */
-			int			_status;/* exit code */
-			compat_clock_t		_utime;
-			compat_clock_t		_stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			__u32	_addr;	/* faulting insn/memory ref. - pointer */
-		} _sigfault;
-                          
-		/* SIGPOLL */
-		struct {
-			int	_band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int	_fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
-/*
- * How these fields are to be accessed.
- */
-#define si_pid		_sifields._kill._pid
-#define si_uid		_sifields._kill._uid
-#define si_status	_sifields._sigchld._status
-#define si_utime	_sifields._sigchld._utime
-#define si_stime	_sifields._sigchld._stime
-#define si_value	_sifields._rt._sigval
-#define si_int		_sifields._rt._sigval.sival_int
-#define si_ptr		_sifields._rt._sigval.sival_ptr
-#define si_addr		_sifields._sigfault._addr
-#define si_band		_sifields._sigpoll._band
-#define si_fd		_sifields._sigpoll._fd    
-#define si_tid		_sifields._timer._tid
-#define si_overrun	_sifields._timer._overrun
 
 /* asm/sigcontext.h */
 typedef union
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index b8be20d42a0..cef99fbc0a2 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -36,6 +36,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -147,6 +148,65 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			unsigned int _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			unsigned int _uid;		/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;		/* which child */
+			unsigned int _uid;		/* sender's uid */
+			int _status;			/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+		struct {
+			u32 _addr; /* faulting insn/memory ref. */
+			int _trapno;
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -156,7 +216,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
index 215900fce21..dbc182c438b 100644
--- a/arch/sparc/include/asm/siginfo.h
+++ b/arch/sparc/include/asm/siginfo.h
@@ -3,7 +3,6 @@
 
 #if defined(__sparc__) && defined(__arch64__)
 
-#define SI_PAD_SIZE32	((SI_MAX_SIZE/sizeof(int)) - 3)
 #define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
 #define __ARCH_SI_BAND_T int
 
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index a53e0a5fd3a..53e48f721ce 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -54,58 +54,6 @@ struct signal_frame32 {
 	/* __siginfo_rwin_t * */u32 rwin_save;
 } __attribute__((aligned(8)));
 
-typedef struct compat_siginfo{
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;			/* timer id */
-			int _overrun;			/* overrun count */
-			compat_sigval_t _sigval;		/* same as below */
-			int _sys_private;		/* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;		/* which child */
-			unsigned int _uid;		/* sender's uid */
-			int _status;			/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
-		struct {
-			u32 _addr; /* faulting insn/memory ref. */
-			int _trapno;
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-}compat_siginfo_t;
-
 struct rt_signal_frame32 {
 	struct sparc_stackf32	ss;
 	compat_siginfo_t	info;
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 6e74450ff0a..3063e6fc8da 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -110,6 +110,68 @@ struct compat_flock64 {
 
 typedef u32               compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define COMPAT_SI_PAD_SIZE	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[COMPAT_SI_PAD_SIZE];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+			int _overrun_incr;	/* amount to add to overrun */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+			int _trapno;	/* TRAP # which caused the signal */
+#endif
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 474571b8408..7bc0859a9f5 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -55,63 +55,6 @@ struct compat_ucontext {
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#define COMPAT_SI_PAD_SIZE	((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
-
-struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[COMPAT_SI_PAD_SIZE];
-
-		/* kill() */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;	/* not to be passed to user */
-			int _overrun_incr;	/* amount to add to overrun */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			unsigned int _addr;	/* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-			int _trapno;	/* TRAP # which caused the signal */
-#endif
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-};
-
 struct compat_rt_sigframe {
 	unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
 	struct compat_siginfo info;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fedf32b73e6..59c6c401f79 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -41,6 +41,7 @@ typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64 __attribute__((aligned(4))) compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -124,6 +125,78 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32               compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+			int _overrun_incr;	/* amount to add to overrun */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGCHLD (x32 version) */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_s64 _utime;
+			compat_s64 _stime;
+		} _sigchld_x32;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+
+		struct {
+			unsigned int _call_addr; /* calling insn */
+			int _syscall;	/* triggering system call number */
+			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -209,7 +282,6 @@ typedef struct user_regs_struct32 compat_elf_gregset_t;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index b04cbdb138c..e6232773ce4 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -86,73 +86,6 @@ struct stat64 {
 	unsigned long long	st_ino;
 } __attribute__((packed));
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[((128 / sizeof(int)) - 3)];
-
-		/* kill() */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;	/* not to be passed to user */
-			int _overrun_incr;	/* amount to add to overrun */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGCHLD (x32 version) */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_s64 _utime;
-			compat_s64 _stime;
-		} _sigchld_x32;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			unsigned int _addr;	/* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-
-		struct {
-			unsigned int _call_addr; /* calling insn */
-			int _syscall;	/* triggering system call number */
-			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
-		} _sigsys;
-	} _sifields;
-} compat_siginfo_t;
-
 #define IA32_STACK_TOP IA32_PAGE_OFFSET
 
 #ifdef __KERNEL__
diff --git a/include/linux/compat.h b/include/linux/compat.h
index fd4e29956d1..3f53d002c7c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -160,11 +160,6 @@ struct compat_ustat {
 	char			f_fpack[6];
 };
 
-typedef union compat_sigval {
-	compat_int_t	sival_int;
-	compat_uptr_t	sival_ptr;
-} compat_sigval_t;
-
 #define COMPAT_SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 3)
 
 typedef struct compat_sigevent {
-- 
cgit v1.2.3-70-g09d2


From af1839eb4bd4fe079a125eb199205fceb6ae19e6 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:08 -0700
Subject: Kconfig: clean up the long arch list for the UID16 config option

Introduce HAVE_UID16 config option and select it in corresponding
architecture Kconfig files.  UID16 now only depends on HAVE_UID16.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig       | 1 +
 arch/arm64/Kconfig     | 1 +
 arch/blackfin/Kconfig  | 1 +
 arch/cris/Kconfig      | 1 +
 arch/frv/Kconfig       | 1 +
 arch/h8300/Kconfig     | 1 +
 arch/m68k/Kconfig      | 1 +
 arch/s390/Kconfig      | 1 +
 arch/sh/Kconfig        | 1 +
 arch/sparc/Kconfig     | 2 ++
 arch/um/Kconfig.common | 1 +
 arch/x86/Kconfig       | 2 ++
 init/Kconfig           | 6 ++++--
 13 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6d2f7f5c003..5f543967293 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -39,6 +39,7 @@ config ARM
 	select HARDIRQS_SW_RESEND
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HARDIRQS_SW_RESEND
 	select CPU_PM if (SUSPEND || CPU_IDLE)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 767ba568545..e61acae0d89 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -193,6 +193,7 @@ config COMPAT
 	bool "Kernel support for 32-bit EL0"
 	depends on !ARM64_64K_PAGES
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	help
 	  This option enables support for a 32-bit EL0 running under a 64-bit
 	  kernel at EL1. AArch32-specific components such as system calls,
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c7092e6057c..2169889c73e 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -33,6 +33,7 @@ config BLACKFIN
 	select HAVE_PERF_EVENTS
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_ATOMIC64
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 72bd5ae50a8..a118163b04e 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -42,6 +42,7 @@ config CRIS
 	select HAVE_IDE
 	select GENERIC_ATOMIC64
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 971c0a19fac..cc5709d1835 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -5,6 +5,7 @@ config FRV
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
+	select HAVE_UID16
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09c..90462eb23d0 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -3,6 +3,7 @@ config H8300
 	default y
 	select HAVE_IDE
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dc..3e2b2f66db6 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,6 +6,7 @@ config M68K
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
+	select HAVE_UID16
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 	select GENERIC_CPU_DEVICES
 	select GENERIC_STRNCPY_FROM_USER if MMU
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c8af429991d..baba37cfcf8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -126,6 +126,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select HAVE_UID16 if 32BIT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e804..f0c85e42477 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -21,6 +21,7 @@ config SUPERH
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_XZ
 	select HAVE_KERNEL_LZO
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e..e66481015d3 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC32
 	def_bool !64BIT
 	select GENERIC_ATOMIC64
 	select CLZ_TAB
+	select HAVE_UID16
 
 config SPARC64
 	def_bool 64BIT
@@ -571,6 +572,7 @@ config COMPAT
 	depends on SPARC64
 	default y
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index cb837c22392..648121b037d 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
 	bool
 	default y
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IO
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b72777ff32a..fd5d7c2c2da 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -10,6 +10,7 @@ config X86_32
 	def_bool y
 	depends on !64BIT
 	select CLKSRC_I8253
+	select HAVE_UID16
 
 config X86_64
 	def_bool y
@@ -2168,6 +2169,7 @@ config IA32_EMULATION
 	bool "IA32 Emulation"
 	depends on X86_64
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	---help---
 	  Include code to run legacy 32-bit programs under a
 	  64-bit kernel. You should likely turn this on, unless you're
diff --git a/init/Kconfig b/init/Kconfig
index ed6334dd5e7..38bab420bd9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1125,10 +1125,12 @@ menuconfig EXPERT
           environments which can tolerate a "non-standard" kernel.
           Only use this if you really know what you are doing.
 
+config HAVE_UID16
+	bool
+
 config UID16
 	bool "Enable 16-bit UID system calls" if EXPERT
-	depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION) \
-		|| AARCH32_EMULATION
+	depends on HAVE_UID16
 	default y
 	help
 	  This enables the legacy 16-bit UID syscall wrappers.
-- 
cgit v1.2.3-70-g09d2


From b69ec42b1b194cc88f04b3fbcda8d3f93182d6c3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:11 -0700
Subject: Kconfig: clean up the long arch list for the DEBUG_KMEMLEAK config
 option

Introduce HAVE_DEBUG_KMEMLEAK config option and select it in corresponding
architecture Kconfig files.  DEBUG_KMEMLEAK now only depends on
HAVE_DEBUG_KMEMLEAK.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig        | 1 +
 arch/arm64/Kconfig      | 1 +
 arch/microblaze/Kconfig | 1 +
 arch/mips/Kconfig       | 1 +
 arch/powerpc/Kconfig    | 1 +
 arch/s390/Kconfig       | 1 +
 arch/sh/Kconfig         | 1 +
 arch/sparc/Kconfig      | 1 +
 arch/tile/Kconfig       | 1 +
 arch/x86/Kconfig        | 1 +
 lib/Kconfig.debug       | 8 ++++----
 11 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5f543967293..2867a774230 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,6 +25,7 @@ config ARM
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e61acae0d89..5dc9273781d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
 	select HAVE_GENERIC_DMA_COHERENT
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6133bed2b85..53fd94ab60f 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -16,6 +16,7 @@ config MICROBLAZE
 	select OF
 	select OF_EARLY_FLATTREE
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select HAVE_DEBUG_KMEMLEAK
 	select IRQ_DOMAIN
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index faf65286574..335115e5bdd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -17,6 +17,7 @@ config MIPS
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_DEBUG_KMEMLEAK
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select RTC_LIB if !MACH_LOONGSON
 	select GENERIC_ATOMIC64 if !64BIT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4ce0be32d15..6a798a70a6d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -113,6 +113,7 @@ config PPC
 	select HAVE_DMA_API_DEBUG
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_OPROFILE
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index baba37cfcf8..8c6d7986f6d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -80,6 +80,7 @@ config S390
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f0c85e42477..cfbf3e3c982 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
 	select PERF_USE_VMALLOC
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e66481015d3..274d6cf0ada 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -60,6 +60,7 @@ config SPARC64
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_DEBUG_KMEMLEAK
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index c9a3c1fe729..9a0d77d3ba1 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -9,6 +9,7 @@ config TILE
 	select GENERIC_FIND_FIRST_BIT
 	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fd5d7c2c2da..3fea1848d95 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -66,6 +66,7 @@ config X86
 	select HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_DEBUG_KMEMLEAK
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7fba3a98967..736db399050 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -450,12 +450,12 @@ config SLUB_STATS
 	  out which slabs are relevant to a particular load.
 	  Try running: slabinfo -DA
 
+config HAVE_DEBUG_KMEMLEAK
+	bool
+
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && \
-		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || \
-		 MICROBLAZE || TILE || ARM64)
-
+	depends on DEBUG_KERNEL && EXPERIMENTAL && HAVE_DEBUG_KMEMLEAK
 	select DEBUG_FS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
-- 
cgit v1.2.3-70-g09d2


From 7ac57a89de958fbb5271dc504d0c25e34dbeec32 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:16 -0700
Subject: Kconfig: clean up the "#if defined(arch)" list for exception-trace
 sysctl entry

Introduce SYSCTL_EXCEPTION_TRACE config option and selec it in the
architectures requiring support for the "exception-trace" debug_table
entry in kernel/sysctl.c.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/Kconfig   | 1 +
 arch/powerpc/Kconfig | 1 +
 arch/s390/Kconfig    | 1 +
 arch/sparc/Kconfig   | 1 +
 arch/tile/Kconfig    | 1 +
 arch/x86/Kconfig     | 1 +
 init/Kconfig         | 5 +++++
 kernel/sysctl.c      | 3 +--
 8 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a3085605874..7ff68c94607 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -28,6 +28,7 @@ config ARM64
 	select PERF_USE_VMALLOC
 	select RTC_LIB
 	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6a798a70a6d..df7edb887a0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -99,6 +99,7 @@ config PPC
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8c6d7986f6d..ceff7aef247 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,6 +68,7 @@ config S390
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_SYSCALL_TRACEPOINTS
+	select SYSCTL_EXCEPTION_TRACE
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 700a01adec3..e184075877d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -18,6 +18,7 @@ config SPARC
 	select HAVE_OPROFILE
 	select HAVE_ARCH_KGDB if !SMP || SPARC64
 	select HAVE_ARCH_TRACEHOOK
+	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index df69d4296b4..dc46490adca 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -7,6 +7,7 @@ config TILE
 	select HAVE_DMA_API_DEBUG
 	select HAVE_KVM if !TILEGX
 	select GENERIC_FIND_FIRST_BIT
+	select SYSCTL_EXCEPTION_TRACE
 	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3fea1848d95..6119d6c7002 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,7 @@ config X86
 	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_SYSCALL_TRACEPOINTS
+	select SYSCTL_EXCEPTION_TRACE
 	select HAVE_KVM
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
diff --git a/init/Kconfig b/init/Kconfig
index 38bab420bd9..4c93533da42 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1152,6 +1152,11 @@ config SYSCTL_SYSCALL
 
 	  If unsure say N here.
 
+config SYSCTL_EXCEPTION_TRACE
+	bool
+	help
+	  Enable support for /proc/sys/debug/exception-trace.
+
 config KALLSYMS
 	 bool "Load all symbols for debugging/ksymoops" if EXPERT
 	 default y
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c2a2f8084ba..26f65eaa01f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1549,8 +1549,7 @@ static struct ctl_table fs_table[] = {
 };
 
 static struct ctl_table debug_table[] = {
-#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
-    defined(CONFIG_S390) || defined(CONFIG_TILE) || defined(CONFIG_ARM64)
+#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
 	{
 		.procname	= "exception-trace",
 		.data		= &show_unhandled_signals,
-- 
cgit v1.2.3-70-g09d2


From b1a86e15dc0304366f50ba1720834bc419c801b1 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 8 Oct 2012 16:28:23 -0700
Subject: x86, pat: remove the dependency on 'vm_pgoff' in track/untrack pfn
 vma routines

'pfn' argument for track_pfn_vma_new() can be used for reserving the
attribute for the pfn range.  No need to depend on 'vm_pgoff'

Similarly, untrack_pfn_vma() can depend on the 'pfn' argument if it is
non-zero or can use follow_phys() to get the starting value of the pfn
range.

Also the non zero 'size' argument can be used instead of recomputing it
from vma.

This cleanup also prepares the ground for the track/untrack pfn vma
routines to take over the ownership of setting PAT specific vm_flag in the
'vma'.

[khlebnikov@openvz.org: Clear pfn to paddr conversion]
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d68ef6d226..de36c886cd3 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -704,21 +704,18 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 			unsigned long pfn, unsigned long size)
 {
+	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
-	resource_size_t paddr;
-	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (is_linear_pfn_mapping(vma)) {
-		/* reserve the whole chunk starting from vm_pgoff */
-		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
-		return reserve_pfn_range(paddr, vma_size, prot, 0);
-	}
+	/* reserve the whole chunk starting from paddr */
+	if (is_linear_pfn_mapping(vma))
+		return reserve_pfn_range(paddr, size, prot, 0);
 
 	if (!pat_enabled)
 		return 0;
 
 	/* for vm_insert_pfn and friends, we set prot based on lookup */
-	flags = lookup_memtype(pfn << PAGE_SHIFT);
+	flags = lookup_memtype(paddr);
 	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
 			 flags);
 
@@ -728,20 +725,28 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 /*
  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
  */
 void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 			unsigned long size)
 {
 	resource_size_t paddr;
-	unsigned long vma_size = vma->vm_end - vma->vm_start;
+	unsigned long prot;
 
-	if (is_linear_pfn_mapping(vma)) {
-		/* free the whole chunk starting from vm_pgoff */
-		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
-		free_pfn_range(paddr, vma_size);
+	if (!is_linear_pfn_mapping(vma))
 		return;
+
+	/* free the chunk starting from pfn or the whole chunk */
+	paddr = (resource_size_t)pfn << PAGE_SHIFT;
+	if (!paddr && !size) {
+		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+			WARN_ON_ONCE(1);
+			return;
+		}
+
+		size = vma->vm_end - vma->vm_start;
 	}
+	free_pfn_range(paddr, size);
 }
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
-- 
cgit v1.2.3-70-g09d2


From 5180da410db6369d1f95c9014da1c9bc33fb043e Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 8 Oct 2012 16:28:29 -0700
Subject: x86, pat: separate the pfn attribute tracking for remap_pfn_range and
 vm_insert_pfn

With PAT enabled, vm_insert_pfn() looks up the existing pfn memory
attribute and uses it.  Expectation is that the driver reserves the
memory attributes for the pfn before calling vm_insert_pfn().

remap_pfn_range() (when called for the whole vma) will setup a new
attribute (based on the prot argument) for the specified pfn range.
This addresses the legacy usage which typically calls remap_pfn_range()
with a desired memory attribute.  For ranges smaller than the vma size
(which is typically not the case), remap_pfn_range() will use the
existing memory attribute for the pfn range.

Expose two different API's for these different behaviors.
track_pfn_insert() for tracking the pfn attribute set by vm_insert_pfn()
and track_pfn_remap() for the remap_pfn_range().

This cleanup also prepares the ground for the track/untrack pfn vma
routines to take over the ownership of setting PAT specific vm_flag in
the 'vma'.

[khlebnikov@openvz.org: Clear checks in track_pfn_remap()]
[akpm@linux-foundation.org: tweak a few comments]
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c             | 47 +++++++++++++++++++++++++++---------
 include/asm-generic/pgtable.h | 55 +++++++++++++++++++++++++------------------
 mm/memory.c                   | 13 ++++------
 3 files changed, 73 insertions(+), 42 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index de36c886cd3..74a702674e8 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -664,13 +664,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
 }
 
 /*
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
  * copied through copy_page_range().
  *
  * If the vma has a linear pfn mapping for the entire range, we get the prot
  * from pte and reserve the entire vma range with single reserve_pfn_range call.
  */
-int track_pfn_vma_copy(struct vm_area_struct *vma)
+int track_pfn_copy(struct vm_area_struct *vma)
 {
 	resource_size_t paddr;
 	unsigned long prot;
@@ -694,15 +694,12 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 }
 
 /*
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- *
  * prot is passed in as a parameter for the new mapping. If the vma has a
  * linear pfn mapping for the entire range reserve the entire vma range with
  * single reserve_pfn_range call.
  */
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-			unsigned long pfn, unsigned long size)
+int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+		    unsigned long pfn, unsigned long size)
 {
 	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
@@ -714,8 +711,36 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 	if (!pat_enabled)
 		return 0;
 
-	/* for vm_insert_pfn and friends, we set prot based on lookup */
+	/*
+	 * For anything smaller than the vma size we set prot based on the
+	 * lookup.
+	 */
 	flags = lookup_memtype(paddr);
+
+	/* Check memtype for the remaining pages */
+	while (size > PAGE_SIZE) {
+		size -= PAGE_SIZE;
+		paddr += PAGE_SIZE;
+		if (flags != lookup_memtype(paddr))
+			return -EINVAL;
+	}
+
+	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+			 flags);
+
+	return 0;
+}
+
+int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+		     unsigned long pfn)
+{
+	unsigned long flags;
+
+	if (!pat_enabled)
+		return 0;
+
+	/* Set prot based on lookup */
+	flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
 	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
 			 flags);
 
@@ -723,12 +748,12 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 }
 
 /*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
  * can be for the entire vma (in which case pfn, size are zero).
  */
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-			unsigned long size)
+void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+		 unsigned long size)
 {
 	resource_size_t paddr;
 	unsigned long prot;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ff4947b7a97..d4d4592c97f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -381,48 +381,57 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 
 #ifndef __HAVE_PFNMAP_TRACKING
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
+ * Interfaces that can be used by architecture code to keep track of
+ * memory type of pfn mappings specified by the remap_pfn_range,
+ * vm_insert_pfn.
+ */
+
+/*
+ * track_pfn_remap is called when a _new_ pfn mapping is being established
+ * by remap_pfn_range() for physical range indicated by pfn and size.
  */
-static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-					unsigned long pfn, unsigned long size)
+static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+				  unsigned long pfn, unsigned long size)
 {
 	return 0;
 }
 
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_insert is called when a _new_ single pfn is established
+ * by vm_insert_pfn().
+ */
+static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+				   unsigned long pfn)
+{
+	return 0;
+}
+
+/*
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
  * copied through copy_page_range().
  */
-static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
+static inline int track_pfn_copy(struct vm_area_struct *vma)
 {
 	return 0;
 }
 
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
  */
-static inline void untrack_pfn_vma(struct vm_area_struct *vma,
-					unsigned long pfn, unsigned long size)
+static inline void untrack_pfn(struct vm_area_struct *vma,
+			       unsigned long pfn, unsigned long size)
 {
 }
 #else
-extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-				unsigned long pfn, unsigned long size);
-extern int track_pfn_vma_copy(struct vm_area_struct *vma);
-extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-				unsigned long size);
+extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+			   unsigned long pfn, unsigned long size);
+extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+			    unsigned long pfn);
+extern int track_pfn_copy(struct vm_area_struct *vma);
+extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+			unsigned long size);
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/mm/memory.c b/mm/memory.c
index 57361708d1a..6bef278ad30 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1060,7 +1060,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
 		 */
-		ret = track_pfn_vma_copy(vma);
+		ret = track_pfn_copy(vma);
 		if (ret)
 			return ret;
 	}
@@ -1328,7 +1328,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 		uprobe_munmap(vma, start, end);
 
 	if (unlikely(is_pfn_mapping(vma)))
-		untrack_pfn_vma(vma, 0, 0);
+		untrack_pfn(vma, 0, 0);
 
 	if (start != end) {
 		if (unlikely(is_vm_hugetlb_page(vma))) {
@@ -2162,14 +2162,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
-	if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))
+	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
 	ret = insert_pfn(vma, addr, pfn, pgprot);
 
-	if (ret)
-		untrack_pfn_vma(vma, pfn, PAGE_SIZE);
-
 	return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn);
@@ -2311,7 +2308,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
-	err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
+	err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
 	if (err) {
 		/*
 		 * To indicate that track_pfn related cleanup is not
@@ -2335,7 +2332,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	} while (pgd++, addr = next, addr != end);
 
 	if (err)
-		untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+		untrack_pfn(vma, pfn, PAGE_ALIGN(size));
 
 	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From b3b9c2932c32e0692018ed5f12f3fd8c70eea8ce Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Mon, 8 Oct 2012 16:28:34 -0700
Subject: mm, x86, pat: rework linear pfn-mmap tracking

Replace the generic vma-flag VM_PFN_AT_MMAP with x86-only VM_PAT.

We can toss mapping address from remap_pfn_range() into
track_pfn_vma_new(), and collect all PAT-related logic together in
arch/x86/.

This patch also restores orignal frustration-free is_cow_mapping() check
in remap_pfn_range(), as it was before commit v2.6.28-rc8-88-g3c8bb73
("x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3")

is_linear_pfn_mapping() checks can be removed from mm/huge_memory.c,
because it already handled by VM_PFNMAP in VM_NO_THP bit-mask.

[suresh.b.siddha@intel.com: Reset the VM_PAT flag as part of untrack_pfn_vma()]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c             | 17 ++++++++++++-----
 include/asm-generic/pgtable.h |  6 ++++--
 include/linux/mm.h            | 20 +-------------------
 mm/huge_memory.c              | 19 +++----------------
 mm/memory.c                   | 26 ++++++++++----------------
 5 files changed, 30 insertions(+), 58 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 74a702674e8..0eb572eda40 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,7 +677,7 @@ int track_pfn_copy(struct vm_area_struct *vma)
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 	pgprot_t pgprot;
 
-	if (is_linear_pfn_mapping(vma)) {
+	if (vma->vm_flags & VM_PAT) {
 		/*
 		 * reserve the whole chunk covered by vma. We need the
 		 * starting address and protection from pte.
@@ -699,14 +699,20 @@ int track_pfn_copy(struct vm_area_struct *vma)
  * single reserve_pfn_range call.
  */
 int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-		    unsigned long pfn, unsigned long size)
+		    unsigned long pfn, unsigned long addr, unsigned long size)
 {
 	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
 
 	/* reserve the whole chunk starting from paddr */
-	if (is_linear_pfn_mapping(vma))
-		return reserve_pfn_range(paddr, size, prot, 0);
+	if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+		int ret;
+
+		ret = reserve_pfn_range(paddr, size, prot, 0);
+		if (!ret)
+			vma->vm_flags |= VM_PAT;
+		return ret;
+	}
 
 	if (!pat_enabled)
 		return 0;
@@ -758,7 +764,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 	resource_size_t paddr;
 	unsigned long prot;
 
-	if (!is_linear_pfn_mapping(vma))
+	if (!(vma->vm_flags & VM_PAT))
 		return;
 
 	/* free the chunk starting from pfn or the whole chunk */
@@ -772,6 +778,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 		size = vma->vm_end - vma->vm_start;
 	}
 	free_pfn_range(paddr, size);
+	vma->vm_flags &= ~VM_PAT;
 }
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index d4d4592c97f..c9a612069c8 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -391,7 +391,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  * by remap_pfn_range() for physical range indicated by pfn and size.
  */
 static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-				  unsigned long pfn, unsigned long size)
+				  unsigned long pfn, unsigned long addr,
+				  unsigned long size)
 {
 	return 0;
 }
@@ -426,7 +427,8 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
 }
 #else
 extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-			   unsigned long pfn, unsigned long size);
+			   unsigned long pfn, unsigned long addr,
+			   unsigned long size);
 extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 			    unsigned long pfn);
 extern int track_pfn_copy(struct vm_area_struct *vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be906b57..75d1632d347 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -117,7 +117,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
-#define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#define VM_PAT		0x40000000	/* PAT reserves whole VMA at once (x86) */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
 /* Bits set in the VMA until the stack is in its final location */
@@ -158,24 +158,6 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 
-/*
- * This interface is used by x86 PAT code to identify a pfn mapping that is
- * linear over entire vma. This is to optimize PAT code that deals with
- * marking the physical region with a particular prot. This is not for generic
- * mm use. Note also that this check will not work if the pfn mapping is
- * linear for a vma starting at physical address 0. In which case PAT code
- * falls back to slow path of reserving physical range page by page.
- */
-static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & VM_PFN_AT_MMAP);
-}
-
-static inline int is_pfn_mapping(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & VM_PFNMAP);
-}
-
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 141dbb69509..73cb22ee966 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1655,11 +1655,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	/*
-	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
-	 * true too, verify it here.
-	 */
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1912,11 +1908,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	/*
-	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
-	 * true too, verify it here.
-	 */
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2154,12 +2146,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-		/*
-		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
-		 * must be true too, verify it here.
-		 */
-		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
-			  vma->vm_flags & VM_NO_THP);
+		VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;
diff --git a/mm/memory.c b/mm/memory.c
index 6bef278ad30..655e1429388 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1055,7 +1055,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
-	if (unlikely(is_pfn_mapping(vma))) {
+	if (unlikely(vma->vm_flags & VM_PFNMAP)) {
 		/*
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
@@ -1327,7 +1327,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 	if (vma->vm_file)
 		uprobe_munmap(vma, start, end);
 
-	if (unlikely(is_pfn_mapping(vma)))
+	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn(vma, 0, 0);
 
 	if (start != end) {
@@ -2299,26 +2299,20 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 * There's a horrible special case to handle copy-on-write
 	 * behaviour that some programs depend on. We mark the "original"
 	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
+	 * See vm_normal_page() for details.
 	 */
-	if (addr == vma->vm_start && end == vma->vm_end) {
+	if (is_cow_mapping(vma->vm_flags)) {
+		if (addr != vma->vm_start || end != vma->vm_end)
+			return -EINVAL;
 		vma->vm_pgoff = pfn;
-		vma->vm_flags |= VM_PFN_AT_MMAP;
-	} else if (is_cow_mapping(vma->vm_flags))
+	}
+
+	err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+	if (err)
 		return -EINVAL;
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
-	err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
-	if (err) {
-		/*
-		 * To indicate that track_pfn related cleanup is not
-		 * needed from higher level routine calling unmap_vmas
-		 */
-		vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
-		vma->vm_flags &= ~VM_PFN_AT_MMAP;
-		return -EINVAL;
-	}
-
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
 	pgd = pgd_offset(mm, addr);
-- 
cgit v1.2.3-70-g09d2


From 314e51b9851b4f4e8ab302243ff5a6fc6147f379 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Mon, 8 Oct 2012 16:29:02 -0700
Subject: mm: kill vma flag VM_RESERVED and mm->reserved_vm counter

A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA,
currently it lost original meaning but still has some effects:

 | effect                 | alternative flags
-+------------------------+---------------------------------------------
1| account as reserved_vm | VM_IO
2| skip in core dump      | VM_IO, VM_DONTDUMP
3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP
4| do not mlock           | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP

This patch removes reserved_vm counter from mm_struct.  Seems like nobody
cares about it, it does not exported into userspace directly, it only
reduces total_vm showed in proc.

Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP.

remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP.
remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP.

[akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/unevictable-lru.txt             |  4 ++--
 arch/alpha/kernel/pci-sysfs.c                    |  2 +-
 arch/ia64/kernel/perfmon.c                       |  2 +-
 arch/ia64/mm/init.c                              |  3 ++-
 arch/powerpc/kvm/book3s_hv.c                     |  2 +-
 arch/sparc/kernel/pci.c                          |  2 +-
 arch/unicore32/kernel/process.c                  |  2 +-
 arch/x86/xen/mmu.c                               |  3 +--
 drivers/char/mbcs.c                              |  2 +-
 drivers/char/mem.c                               |  2 +-
 drivers/char/mspec.c                             |  2 +-
 drivers/gpu/drm/drm_gem.c                        |  2 +-
 drivers/gpu/drm/drm_vm.c                         | 10 ++--------
 drivers/gpu/drm/exynos/exynos_drm_gem.c          |  2 +-
 drivers/gpu/drm/gma500/framebuffer.c             |  3 +--
 drivers/gpu/drm/ttm/ttm_bo_vm.c                  |  4 ++--
 drivers/gpu/drm/udl/udl_fb.c                     |  2 +-
 drivers/infiniband/hw/ehca/ehca_uverbs.c         |  4 ++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c     |  2 +-
 drivers/infiniband/hw/qib/qib_file_ops.c         |  2 +-
 drivers/media/pci/meye/meye.c                    |  2 +-
 drivers/media/platform/omap/omap_vout.c          |  2 +-
 drivers/media/platform/vino.c                    |  2 +-
 drivers/media/usb/sn9c102/sn9c102_core.c         |  3 +--
 drivers/media/usb/usbvision/usbvision-video.c    |  3 +--
 drivers/media/v4l2-core/videobuf-dma-sg.c        |  2 +-
 drivers/media/v4l2-core/videobuf-vmalloc.c       |  2 +-
 drivers/media/v4l2-core/videobuf2-memops.c       |  2 +-
 drivers/misc/carma/carma-fpga.c                  |  2 --
 drivers/misc/sgi-gru/grufile.c                   |  5 ++---
 drivers/mtd/mtdchar.c                            |  2 +-
 drivers/scsi/sg.c                                |  2 +-
 drivers/staging/omapdrm/omap_gem_dmabuf.c        |  2 +-
 drivers/staging/tidspbridge/rmgr/drv_interface.c |  2 +-
 drivers/uio/uio.c                                |  4 +---
 drivers/usb/mon/mon_bin.c                        |  2 +-
 drivers/video/68328fb.c                          |  2 +-
 drivers/video/aty/atyfb_base.c                   |  3 +--
 drivers/video/fb-puv3.c                          |  3 +--
 drivers/video/fb_defio.c                         |  2 +-
 drivers/video/fbmem.c                            |  3 +--
 drivers/video/gbefb.c                            |  2 +-
 drivers/video/omap2/omapfb/omapfb-main.c         |  2 +-
 drivers/video/sbuslib.c                          |  5 ++---
 drivers/video/smscufx.c                          |  1 -
 drivers/video/udlfb.c                            |  1 -
 drivers/video/vermilion/vermilion.c              |  1 -
 drivers/video/vfb.c                              |  1 -
 drivers/xen/gntalloc.c                           |  2 +-
 drivers/xen/gntdev.c                             |  2 +-
 drivers/xen/privcmd.c                            |  3 ++-
 fs/binfmt_elf.c                                  |  2 +-
 fs/binfmt_elf_fdpic.c                            |  2 +-
 fs/hugetlbfs/inode.c                             |  2 +-
 fs/proc/task_mmu.c                               |  2 +-
 include/linux/mempolicy.h                        |  2 +-
 include/linux/mm.h                               |  3 +--
 include/linux/mm_types.h                         |  1 -
 kernel/events/core.c                             |  2 +-
 mm/ksm.c                                         |  3 +--
 mm/memory.c                                      | 11 +++++------
 mm/mlock.c                                       |  2 +-
 mm/mmap.c                                        |  2 --
 mm/nommu.c                                       |  2 +-
 mm/vmalloc.c                                     |  3 +--
 security/selinux/selinuxfs.c                     |  2 +-
 sound/core/pcm_native.c                          |  6 +++---
 sound/usb/usx2y/us122l.c                         |  2 +-
 sound/usb/usx2y/usX2Yhwdep.c                     |  2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c                  |  2 +-
 70 files changed, 77 insertions(+), 105 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index fa206cccf89..323ff5dba1c 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -371,8 +371,8 @@ mlock_fixup() filters several classes of "special" VMAs:
    mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
    allocate the huge pages and populate the ptes.
 
-3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
-   kernel pages, such as the VDSO page, relay channel pages, etc.  These pages
+3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
+   such as the VDSO page, relay channel pages, etc. These pages
    are inherently unevictable and are not managed on the LRU lists.
    mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
    make_pages_present() to populate the ptes.
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 53649c7d006..b51f7b4818c 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose,
 		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
 
 	vma->vm_pgoff += base >> PAGE_SHIFT;
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 				  vma->vm_end - vma->vm_start,
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f388b4e18a3..ea39eba61ef 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	 */
 	vma->vm_mm	     = mm;
 	vma->vm_file	     = get_file(filp);
-	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
+	vma->vm_flags	     = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
 
 	/*
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0eab454867a..082e383c1b6 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -138,7 +138,8 @@ ia64_init_addr_space (void)
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
-			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
+					VM_DONTEXPAND | VM_DONTDUMP;
 			down_write(&current->mm->mmap_sem);
 			if (insert_vm_struct(current->mm, vma)) {
 				up_write(&current->mm->mmap_sem);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 83e929e66f9..721d4603a23 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = {
 
 static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &kvm_rma_vm_ops;
 	return 0;
 }
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index acc8c838ff7..75b31bcdead 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
 static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
 					    enum pci_mmap_state mmap_state)
 {
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 }
 
 /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index b6f0458c314..b008586dad7 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -380,7 +380,7 @@ int vectors_user_mapping(void)
 	return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
 				       VM_READ | VM_EXEC |
 				       VM_MAYREAD | VM_MAYEXEC |
-				       VM_RESERVED,
+				       VM_DONTEXPAND | VM_DONTDUMP,
 				       NULL);
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824cc2b..fd28d86fe3d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
-	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
-				(VM_PFNMAP | VM_RESERVED | VM_IO)));
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
 	rmd.mfn = mfn;
 	rmd.prot = prot;
diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c
index 0c7d340b9ab..f74e892711d 100644
--- a/drivers/char/mbcs.c
+++ b/drivers/char/mbcs.c
@@ -507,7 +507,7 @@ static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma)
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	/* Remap-pfn-range will mark the range VM_IO */
 	if (remap_pfn_range(vma,
 			    vma->vm_start,
 			    __pa(soft->gscr_addr) >> PAGE_SHIFT,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index e5eedfa24c9..0537903c985 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -322,7 +322,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 
 	vma->vm_ops = &mmap_mem_ops;
 
-	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	/* Remap-pfn-range will mark the range VM_IO */
 	if (remap_pfn_range(vma,
 			    vma->vm_start,
 			    vma->vm_pgoff,
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 845f97fd183..e1f60f968fd 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -286,7 +286,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
 	atomic_set(&vdata->refcnt, 1);
 	vma->vm_private_data = vdata;
 
-	vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND);
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED)
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	vma->vm_ops = &mspec_vm_ops;
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 92177d5aede..24efae464e2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -706,7 +706,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		goto out_unlock;
 	}
 
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = map->handle;
 	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 23a824e6a22..db7bd292410 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -514,8 +514,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_ops = &drm_vm_dma_ops;
 
-	vma->vm_flags |= VM_RESERVED;	/* Don't swap */
-	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	drm_vm_open_locked(dev, vma);
 	return 0;
@@ -643,21 +642,16 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 	case _DRM_SHM:
 		vma->vm_ops = &drm_vm_shm_ops;
 		vma->vm_private_data = (void *)map;
-		/* Don't let this area swap.  Change when
-		   DRM_KERNEL advisory is supported. */
-		vma->vm_flags |= VM_RESERVED;
 		break;
 	case _DRM_SCATTER_GATHER:
 		vma->vm_ops = &drm_vm_sg_ops;
 		vma->vm_private_data = (void *)map;
-		vma->vm_flags |= VM_RESERVED;
 		vma->vm_page_prot = drm_dma_prot(map->type, vma);
 		break;
 	default:
 		return -EINVAL;	/* This should never happen. */
 	}
-	vma->vm_flags |= VM_RESERVED;	/* Don't swap */
-	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	drm_vm_open_locked(dev, vma);
 	return 0;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index fcdbe46914f..d2545560664 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -500,7 +500,7 @@ static int exynos_drm_gem_mmap_buffer(struct file *filp,
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	update_vm_cache_attr(exynos_gem_obj, vma);
 
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 884ba73ac6c..afded54dbb1 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -178,8 +178,7 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	 */
 	vma->vm_ops = &psbfb_vm_ops;
 	vma->vm_private_data = (void *)psbfb;
-	vma->vm_flags |= VM_RESERVED | VM_IO |
-					VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a877813571a..3ba72dbdc4b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -285,7 +285,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	 */
 
 	vma->vm_private_data = bo;
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 out_unref:
 	ttm_bo_unref(&bo);
@@ -300,7 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 
 	vma->vm_ops = &ttm_bo_vm_ops;
 	vma->vm_private_data = ttm_bo_reference(bo);
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 67df842fbb3..69a2b16f42a 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -243,7 +243,7 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 45ee89b65c2..1a1d5d99fcf 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -117,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
 	physical = galpas->user.fw_handle;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-	/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
 			   vma->vm_page_prot);
 	if (unlikely(ret)) {
@@ -139,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
 	u64 start, ofs;
 	struct page *page;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	start = vma->vm_start;
 	for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
 		u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 736d9edbdbe..3eb7e454849 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1225,7 +1225,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &ipath_file_vm_ops;
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	ret = 1;
 
 bail:
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index faa44cb0807..959a5c4ff81 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -971,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &qib_file_vm_ops;
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	ret = 1;
 
 bail:
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index 7bc775219f9..e5a76da8608 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1647,7 +1647,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma)
 
 	vma->vm_ops = &meye_vm_ops;
 	vma->vm_flags &= ~VM_IO;	/* not I/O memory */
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = (void *) (offset / gbufsize);
 	meye_vm_open(vma);
 
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 66ac21d466a..134016f0e66 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -911,7 +911,7 @@ static int omap_vout_mmap(struct file *file, struct vm_area_struct *vma)
 
 	q->bufs[i]->baddr = vma->vm_start;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &omap_vout_vm_ops;
 	vma->vm_private_data = (void *) vout;
diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c
index 790d96cffee..70b0bf4b290 100644
--- a/drivers/media/platform/vino.c
+++ b/drivers/media/platform/vino.c
@@ -3950,7 +3950,7 @@ found:
 
 	fb->map_count = 1;
 
-	vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_flags &= ~VM_IO;
 	vma->vm_private_data = fb;
 	vma->vm_file = file;
diff --git a/drivers/media/usb/sn9c102/sn9c102_core.c b/drivers/media/usb/sn9c102/sn9c102_core.c
index 19ea780b16f..5bfc8e2f018 100644
--- a/drivers/media/usb/sn9c102/sn9c102_core.c
+++ b/drivers/media/usb/sn9c102/sn9c102_core.c
@@ -2126,8 +2126,7 @@ static int sn9c102_mmap(struct file* filp, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	pos = cam->frame[i].bufmem;
 	while (size > 0) { /* size is page-aligned */
diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c
index f67018ed379..5c36a57e659 100644
--- a/drivers/media/usb/usbvision/usbvision-video.c
+++ b/drivers/media/usb/usbvision/usbvision-video.c
@@ -1108,8 +1108,7 @@ static int usbvision_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	/* VM_IO is eventually going to replace PageReserved altogether */
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	pos = usbvision->frame[i].data;
 	while (size > 0) {
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index f300deafd26..828e7c10bd7 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -582,7 +582,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	map->count    = 1;
 	map->q        = q;
 	vma->vm_ops   = &videobuf_vm_ops;
-	vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */
 	vma->vm_private_data = map;
 	dprintk(1, "mmap %p: q=%p %08lx-%08lx pgoff %08lx bufs %d-%d\n",
diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c
index df142580e44..2ff7fcc77b1 100644
--- a/drivers/media/v4l2-core/videobuf-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf-vmalloc.c
@@ -270,7 +270,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	}
 
 	vma->vm_ops          = &videobuf_vm_ops;
-	vma->vm_flags       |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags       |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = map;
 
 	dprintk(1, "mmap %p: q=%p %08lx-%08lx (%lx) pgoff %08lx buf %d\n",
diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index 504cd4cbe29..051ea3571b2 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -163,7 +163,7 @@ int vb2_mmap_pfn_range(struct vm_area_struct *vma, unsigned long paddr,
 		return ret;
 	}
 
-	vma->vm_flags		|= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags		|= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data	= priv;
 	vma->vm_ops		= vm_ops;
 
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 0c43297ed9a..8835eabb3b8 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -1243,8 +1243,6 @@ static int data_mmap(struct file *filp, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	/* IO memory (stop cacheing) */
-	vma->vm_flags |= VM_IO | VM_RESERVED;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	return io_remap_pfn_range(vma, vma->vm_start, addr, vsize,
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index ecafa4ba238..492c8cac69a 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -108,9 +108,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
 				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
 		return -EINVAL;
 
-	vma->vm_flags |=
-	    (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
-			VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
+			 VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_page_prot = PAGE_SHARED;
 	vma->vm_ops = &gru_vm_ops;
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a6e74514e66..73ae81a629f 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1182,7 +1182,7 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma)
 			return -EINVAL;
 		if (set_vm_offset(vma, off) < 0)
 			return -EINVAL;
-		vma->vm_flags |= VM_IO | VM_RESERVED;
+		vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 #ifdef pgprot_noncached
 		if (file->f_flags & O_DSYNC || off >= __pa(high_memory))
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 9c5c5f2b396..be2c9a6561f 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1257,7 +1257,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
 	}
 
 	sfp->mmap_called = 1;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = sfp;
 	vma->vm_ops = &sg_mmap_vm_ops;
 	return 0;
diff --git a/drivers/staging/omapdrm/omap_gem_dmabuf.c b/drivers/staging/omapdrm/omap_gem_dmabuf.c
index 42728e0cc19..c6f3ef6f57b 100644
--- a/drivers/staging/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/staging/omapdrm/omap_gem_dmabuf.c
@@ -160,7 +160,7 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
 		goto out_unlock;
 	}
 
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = obj;
 	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c
index bddea1d3b2c..701a11ac676 100644
--- a/drivers/staging/tidspbridge/rmgr/drv_interface.c
+++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c
@@ -261,7 +261,7 @@ static int bridge_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	u32 status;
 
-	vma->vm_flags |= VM_RESERVED | VM_IO;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	dev_dbg(bridge, "%s: vm filp %p start %lx end %lx page_prot %ulx "
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a783d533a1a..5110f367f1f 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -653,8 +653,6 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 	if (mi < 0)
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED;
-
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	return remap_pfn_range(vma,
@@ -666,7 +664,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 
 static int uio_mmap_logical(struct vm_area_struct *vma)
 {
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &uio_vm_ops;
 	uio_vma_open(vma);
 	return 0;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 91cd85076a4..9a62e89d6dc 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1247,7 +1247,7 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	/* don't do anything here: "fault" will set up page table entries */
 	vma->vm_ops = &mon_bin_vm_ops;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = filp->private_data;
 	mon_bin_vma_open(vma);
 	return 0;
diff --git a/drivers/video/68328fb.c b/drivers/video/68328fb.c
index a425d65d5ba..fa44fbed397 100644
--- a/drivers/video/68328fb.c
+++ b/drivers/video/68328fb.c
@@ -400,7 +400,7 @@ static int mc68x328fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 #ifndef MMU
 	/* this is uClinux (no MMU) specific code */
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_start = videomemory;
 
 	return 0;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 3f2e8c13f1c..868932f904e 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -1942,8 +1942,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	off = vma->vm_pgoff << PAGE_SHIFT;
 	size = vma->vm_end - vma->vm_start;
 
-	/* To stop the swapper from even considering these pages. */
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 
 	if (((vma->vm_pgoff == 0) && (size == info->fix.smem_len)) ||
 	    ((off == info->fix.smem_len) && (size == PAGE_SIZE)))
diff --git a/drivers/video/fb-puv3.c b/drivers/video/fb-puv3.c
index 60a787fa32c..7d106f1f490 100644
--- a/drivers/video/fb-puv3.c
+++ b/drivers/video/fb-puv3.c
@@ -653,9 +653,8 @@ int unifb_mmap(struct fb_info *info,
 				vma->vm_page_prot))
 		return -EAGAIN;
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	return 0;
-
 }
 
 static struct fb_ops unifb_ops = {
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 64cda560c48..88cad6b8b47 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -166,7 +166,7 @@ static const struct address_space_operations fb_deferred_io_aops = {
 static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	vma->vm_ops = &fb_deferred_io_vm_ops;
-	vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND );
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	if (!(info->flags & FBINFO_VIRTFB))
 		vma->vm_flags |= VM_IO;
 	vma->vm_private_data = info;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 0dff12a1dae..3ff0105a496 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1410,8 +1410,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		return -EINVAL;
 	off += start;
 	vma->vm_pgoff = off >> PAGE_SHIFT;
-	/* This is an IO map - tell maydump to skip this VMA */
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	fb_pgprotect(file, vma, off);
 	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index 7e7b7a9ba27..05e2a8a99d8 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -1024,7 +1024,7 @@ static int gbefb_mmap(struct fb_info *info,
 	pgprot_val(vma->vm_page_prot) =
 		pgprot_fb(pgprot_val(vma->vm_page_prot));
 
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 
 	/* look for the starting tile */
 	tile = &gbe_tiles.cpu[offset >> TILE_SHIFT];
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 3c39aa8de92..15373f4aee1 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -1128,7 +1128,7 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 	DBG("user mmap region start %lx, len %d, off %lx\n", start, len, off);
 
 	vma->vm_pgoff = off >> PAGE_SHIFT;
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &mmap_user_ops;
 	vma->vm_private_data = rg;
diff --git a/drivers/video/sbuslib.c b/drivers/video/sbuslib.c
index 3c1de981a18..296afae442f 100644
--- a/drivers/video/sbuslib.c
+++ b/drivers/video/sbuslib.c
@@ -57,9 +57,8 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map,
 
 	off = vma->vm_pgoff << PAGE_SHIFT;
 
-	/* To stop the swapper from even considering these pages */
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
-	
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
+
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/* Each page, see which map applies */
diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
index 5533a32c6ca..97bd6620c36 100644
--- a/drivers/video/smscufx.c
+++ b/drivers/video/smscufx.c
@@ -803,7 +803,6 @@ static int ufx_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 }
 
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 8af64148294..f45eba3d615 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -345,7 +345,6 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 }
 
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index 970e43d13f5..89aef343e29 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -1018,7 +1018,6 @@ static int vmlfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	offset += vinfo->vram_start;
 	pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT;
-	vma->vm_flags |= VM_RESERVED | VM_IO;
 	if (remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT,
 						size, vma->vm_page_prot))
 		return -EAGAIN;
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index 501a922aa9d..c7f692525b8 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -439,7 +439,6 @@ static int vfb_mmap(struct fb_info *info,
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 
 }
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 934985d14c2..4097987b330 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -535,7 +535,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_private_data = vm_priv;
 
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	vma->vm_ops = &gntalloc_vmops;
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 5df9fd847b2..610bfc6be17 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -720,7 +720,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	vma->vm_ops = &gntdev_vmops;
 
-	vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	if (use_ptemod)
 		vma->vm_flags |= VM_DONTCOPY;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ef6389580b8..8adb9cc267f 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -455,7 +455,8 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
 	 * how to recreate these mappings */
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
+			 VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &privcmd_vm_ops;
 	vma->vm_private_data = NULL;
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2b72d26e2e4..e800dec958c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1135,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	}
 
 	/* Do not dump I/O mapped devices or special mappings */
-	if (vma->vm_flags & (VM_IO | VM_RESERVED))
+	if (vma->vm_flags & VM_IO)
 		return 0;
 
 	/* By default, dump shared memory if mapped from an anonymous file. */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 08d812b3228..262db114ff0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
 	int dump_ok;
 
 	/* Do not dump I/O mapped devices or special mappings */
-	if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
+	if (vma->vm_flags & VM_IO) {
 		kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
 		return 0;
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9460120a517..0a0ab8e21b1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	 * way when do_mmap_pgoff unwinds (may be important on powerpc
 	 * and ia64).
 	 */
-	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
+	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &hugetlb_vm_ops;
 
 	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4540b8f76f1..79827ce03e3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmPTE:\t%8lu kB\n"
 		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
-		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
+		total_vm << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
 		mm->pinned_vm << (PAGE_SHIFT-10),
 		hiwater_rss << (PAGE_SHIFT-10),
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 95b738c7abf..ba7a0ff19d3 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
 {
-	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
+	if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
 		return 0;
 	/*
 	 * Migration allocates pages in the highest zone. If we cannot
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc08d558e05..0514fe9d3c8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -96,7 +96,6 @@ extern unsigned int kobjsize(const void *objp);
 
 #define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
-#define VM_RESERVED	0x00080000	/* Count as reserved_vm like IO */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
@@ -148,7 +147,7 @@ extern unsigned int kobjsize(const void *objp);
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
 
 /*
  * mapping from the currently active vm_flags protection bits (the
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 58d3173eb36..a57a43f5ca7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -349,7 +349,6 @@ struct mm_struct {
 	unsigned long shared_vm;	/* Shared pages (files) */
 	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
 	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
-	unsigned long reserved_vm;	/* VM_RESERVED|VM_IO pages */
 	unsigned long def_flags;
 	unsigned long nr_ptes;		/* Page table pages */
 	unsigned long start_code, end_code, start_data, end_data;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f16f3c58f11..cda3ebd49e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3671,7 +3671,7 @@ unlock:
 		atomic_inc(&event->mmap_count);
 	mutex_unlock(&event->mmap_mutex);
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &perf_mmap_vmops;
 
 	return ret;
diff --git a/mm/ksm.c b/mm/ksm.c
index f9ccb16559e..9638620a753 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1469,8 +1469,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		 */
 		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
 				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED  | VM_HUGETLB |
-				 VM_NONLINEAR | VM_MIXEDMAP))
+				 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
 			return 0;		/* just ignore the advice */
 
 #ifdef VM_SAO
diff --git a/mm/memory.c b/mm/memory.c
index 7b1e4feaec0..e09c0481318 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2297,14 +2297,13 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 * rest of the world about it:
 	 *   VM_IO tells people not to look at these pages
 	 *	(accesses can have side effects).
-	 *   VM_RESERVED is specified all over the place, because
-	 *	in 2.4 it kept swapout's vma scan off this vma; but
-	 *	in 2.6 the LRU scan won't even find its pages, so this
-	 *	flag means no more than count its pages in reserved_vm,
-	 * 	and omit it from core dump, even when VM_IO turned off.
 	 *   VM_PFNMAP tells the core MM that the base pages are just
 	 *	raw PFN mappings, and do not have a "struct page" associated
 	 *	with them.
+	 *   VM_DONTEXPAND
+	 *      Disable vma merging and expanding with mremap().
+	 *   VM_DONTDUMP
+	 *      Omit vma from core dump, even when VM_IO turned off.
 	 *
 	 * There's a horrible special case to handle copy-on-write
 	 * behaviour that some programs depend on. We mark the "original"
@@ -2321,7 +2320,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	if (err)
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
diff --git a/mm/mlock.c b/mm/mlock.c
index ef726e8aa8e..a948be4b7ba 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -227,7 +227,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		goto no_mlock;
 
-	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+	if (!((vma->vm_flags & VM_DONTEXPAND) ||
 			is_vm_hugetlb_page(vma) ||
 			vma == get_gate_vma(current->mm))) {
 
diff --git a/mm/mmap.c b/mm/mmap.c
index c1ad2e78ea5..a76042dc806 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -945,8 +945,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
 			mm->exec_vm += pages;
 	} else if (flags & stack_flags)
 		mm->stack_vm += pages;
-	if (flags & (VM_RESERVED|VM_IO))
-		mm->reserved_vm += pages;
 }
 #endif /* CONFIG_PROC_FS */
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 9c4a7b63a4d..12e84e69dd0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1811,7 +1811,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	if (addr != (pfn << PAGE_SHIFT))
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 EXPORT_SYMBOL(remap_pfn_range);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2bb90b1d241..8de704679bf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2163,8 +2163,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 		usize -= PAGE_SIZE;
 	} while (usize > 0);
 
-	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	return 0;
 }
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 55af8c5b57e..3a6e8731646 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -485,7 +485,7 @@ static int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma)
 			return -EACCES;
 	}
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &sel_mmap_policy_ops;
 
 	return 0;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 20554eff5a2..5e12e5bacbb 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3039,7 +3039,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file
 		return -EINVAL;
 	area->vm_ops = &snd_pcm_vm_ops_status;
 	area->vm_private_data = substream;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 
@@ -3076,7 +3076,7 @@ static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file
 		return -EINVAL;
 	area->vm_ops = &snd_pcm_vm_ops_control;
 	area->vm_private_data = substream;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 #else /* ! coherent mmap */
@@ -3170,7 +3170,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {
 int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
 			     struct vm_area_struct *area)
 {
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 #ifdef ARCH_HAS_DMA_MMAP_COHERENT
 	if (!substream->ops->page &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index c4fd3b1d959..d0323a693ba 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -262,7 +262,7 @@ static int usb_stream_hwdep_mmap(struct snd_hwdep *hw,
 	}
 
 	area->vm_ops = &usb_stream_hwdep_vm_ops;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = us122l;
 	atomic_inc(&us122l->mmap_count);
 out:
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index 04aafb43a13..0b34dbc8f30 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -82,7 +82,7 @@ static int snd_us428ctls_mmap(struct snd_hwdep * hw, struct file *filp, struct v
 		us428->us428ctls_sharedmem->CtlSnapShotLast = -2;
 	}
 	area->vm_ops = &us428ctls_vm_ops;
-	area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = hw->private_data;
 	return 0;
 }
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 8e40b6e67e9..cc56007791e 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -723,7 +723,7 @@ static int snd_usX2Y_hwdep_pcm_mmap(struct snd_hwdep * hw, struct file *filp, st
 		return -ENODEV;
 	}
 	area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops;
-	area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = hw->private_data;
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 5d3a551c28c6669dc43be40d8fafafbc2ec8f42b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Oct 2012 16:29:32 -0700
Subject: mm: hugetlb: add arch hook for clearing page flags before entering
 pool

The core page allocator ensures that page flags are zeroed when freeing
pages via free_pages_check.  A number of architectures (ARM, PPC, MIPS)
rely on this property to treat new pages as dirty with respect to the data
cache and perform the appropriate flushing before mapping the pages into
userspace.

This can lead to cache synchronisation problems when using hugepages,
since the allocator keeps its own pool of pages above the usual page
allocator and does not reset the page flags when freeing a page into the
pool.

This patch adds a new architecture hook, arch_clear_hugepage_flags, so
that architectures which rely on the page flags being in a particular
state for fresh allocations can adjust the flags accordingly when a page
is freed into the pool.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/hugetlb.h    | 4 ++++
 arch/mips/include/asm/hugetlb.h    | 4 ++++
 arch/powerpc/include/asm/hugetlb.h | 4 ++++
 arch/s390/include/asm/hugetlb.h    | 1 +
 arch/sh/include/asm/hugetlb.h      | 6 ++++++
 arch/sparc/include/asm/hugetlb.h   | 4 ++++
 arch/tile/include/asm/hugetlb.h    | 4 ++++
 arch/x86/include/asm/hugetlb.h     | 4 ++++
 mm/hugetlb.c                       | 1 +
 9 files changed, 32 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index da55c63728e..94eaa5bd5d0 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -77,4 +77,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 58d36889f09..bd94946a18f 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -112,4 +112,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index dfdb95bc59a..62e11a32c4c 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -151,6 +151,10 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #else /* ! CONFIG_HUGETLB_PAGE */
 static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 				      unsigned long vmaddr)
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d6e6e38056..fc322421b1c 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file,
 }
 
 #define hugetlb_prefault_arch_hook(mm)		do { } while (0)
+#define arch_clear_hugepage_flags(page)		do { } while (0)
 
 int arch_prepare_hugepage(struct page *page);
 void arch_release_hugepage(struct page *page);
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 967068fb79a..b3808c7d67b 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_SH_HUGETLB_H
 #define _ASM_SH_HUGETLB_H
 
+#include <asm/cacheflush.h>
 #include <asm/page.h>
 
 
@@ -89,4 +90,9 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
 #endif /* _ASM_SH_HUGETLB_H */
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 177061064ee..e7927c9758a 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -82,4 +82,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index b2042380a5a..0f885af2b62 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,6 +106,10 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #ifdef CONFIG_HUGETLB_SUPER_PAGES
 static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 				       struct page *page, int writable)
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 439a9acc132..bdd35dbd060 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_X86_HUGETLB_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bc727122dd4..f1bb534254f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -637,6 +637,7 @@ static void free_huge_page(struct page *page)
 		h->surplus_huge_pages--;
 		h->surplus_huge_pages_node[nid]--;
 	} else {
+		arch_clear_hugepage_flags(page);
 		enqueue_huge_page(h, page);
 	}
 	spin_unlock(&hugetlb_lock);
-- 
cgit v1.2.3-70-g09d2


From 15626062f4a98279c59a2a5208c496cf65cbf8c0 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 8 Oct 2012 16:30:04 -0700
Subject: thp, x86: introduce HAVE_ARCH_TRANSPARENT_HUGEPAGE

Cleanup patch in preparation for transparent hugepage support on s390.
Adding new architectures to the TRANSPARENT_HUGEPAGE config option can
make the "depends" line rather ugly, like "depends on (X86 || (S390 &&
64BIT)) && MMU".

This patch adds a HAVE_ARCH_TRANSPARENT_HUGEPAGE instead.  x86 already has
MMU "def_bool y", so the MMU check is superfluous there and
HAVE_ARCH_TRANSPARENT_HUGEPAGE can be selected in arch/x86/Kconfig.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig     | 3 +++
 arch/x86/Kconfig | 1 +
 mm/Kconfig       | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index a62965d057f..550cce4dd64 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -313,4 +313,7 @@ config HAVE_IRQ_TIME_ACCOUNTING
 	  Archs need to ensure they use a high enough resolution clock to
 	  support irq time accounting and then call enable_sched_clock_irqtime().
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6119d6c7002..1ae94bcae5d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -88,6 +88,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if X86_64
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
diff --git a/mm/Kconfig b/mm/Kconfig
index d5c8019c662..3322342a1ff 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -318,7 +318,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
 
 config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
-	depends on X86 && MMU
+	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select COMPACTION
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and
-- 
cgit v1.2.3-70-g09d2


From 9d9e6f9703bbd642f3f2f807e6aaa642a4cbcec9 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:20 -0700
Subject: rbtree: remove prior augmented rbtree implementation

convert arch/x86/mm/pat_rbtree.c to the proposed augmented rbtree api
and remove the old augmented rbtree implementation.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat_rbtree.c | 65 +++++++++++++++++++++++++++++++-------------
 include/linux/rbtree.h   |  8 ------
 lib/rbtree.c             | 71 ------------------------------------------------
 3 files changed, 46 insertions(+), 98 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 8acaddd0fb2..7e1515bd477 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -54,29 +54,57 @@ static u64 get_subtree_max_end(struct rb_node *node)
 	return ret;
 }
 
-/* Update 'subtree_max_end' for a node, based on node and its children */
-static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
+static u64 compute_subtree_max_end(struct memtype *data)
 {
-	struct memtype *data;
-	u64 max_end, child_max_end;
-
-	if (!node)
-		return;
-
-	data = container_of(node, struct memtype, rb);
-	max_end = data->end;
+	u64 max_end = data->end, child_max_end;
 
-	child_max_end = get_subtree_max_end(node->rb_right);
+	child_max_end = get_subtree_max_end(data->rb.rb_right);
 	if (child_max_end > max_end)
 		max_end = child_max_end;
 
-	child_max_end = get_subtree_max_end(node->rb_left);
+	child_max_end = get_subtree_max_end(data->rb.rb_left);
 	if (child_max_end > max_end)
 		max_end = child_max_end;
 
-	data->subtree_max_end = max_end;
+	return max_end;
+}
+
+/* Update 'subtree_max_end' for node and its parents */
+static void memtype_rb_propagate_cb(struct rb_node *node, struct rb_node *stop)
+{
+	while (node != stop) {
+		struct memtype *data = container_of(node, struct memtype, rb);
+		u64 subtree_max_end = compute_subtree_max_end(data);
+		if (data->subtree_max_end == subtree_max_end)
+			break;
+		data->subtree_max_end = subtree_max_end;
+		node = rb_parent(&data->rb);
+	}
+}
+
+static void memtype_rb_copy_cb(struct rb_node *old, struct rb_node *new)
+{
+	struct memtype *old_data = container_of(old, struct memtype, rb);
+	struct memtype *new_data = container_of(new, struct memtype, rb);
+
+	new_data->subtree_max_end = old_data->subtree_max_end;
 }
 
+/* Update 'subtree_max_end' after tree rotation. old and new are the
+ * former and current subtree roots */
+static void memtype_rb_rotate_cb(struct rb_node *old, struct rb_node *new)
+{
+	struct memtype *old_data = container_of(old, struct memtype, rb);
+	struct memtype *new_data = container_of(new, struct memtype, rb);
+
+	new_data->subtree_max_end = old_data->subtree_max_end;
+	old_data->subtree_max_end = compute_subtree_max_end(old_data);
+}
+
+static const struct rb_augment_callbacks memtype_rb_augment_cb = {
+	memtype_rb_propagate_cb, memtype_rb_copy_cb, memtype_rb_rotate_cb
+};
+
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
 				u64 start, u64 end)
@@ -179,15 +207,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 		struct memtype *data = container_of(*node, struct memtype, rb);
 
 		parent = *node;
+		if (data->subtree_max_end < newdata->end)
+			data->subtree_max_end = newdata->end;
 		if (newdata->start <= data->start)
 			node = &((*node)->rb_left);
 		else if (newdata->start > data->start)
 			node = &((*node)->rb_right);
 	}
 
+	newdata->subtree_max_end = newdata->end;
 	rb_link_node(&newdata->rb, parent, node);
-	rb_insert_color(&newdata->rb, root);
-	rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
+	rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
 }
 
 int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -209,16 +239,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
 
 struct memtype *rbt_memtype_erase(u64 start, u64 end)
 {
-	struct rb_node *deepest;
 	struct memtype *data;
 
 	data = memtype_rb_exact_match(&memtype_rbroot, start, end);
 	if (!data)
 		goto out;
 
-	deepest = rb_augment_erase_begin(&data->rb);
-	rb_erase(&data->rb, &memtype_rbroot);
-	rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
+	rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
 out:
 	return data;
 }
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index c902eb9d650..4ace31b3338 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -80,14 +80,6 @@ rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 }
 
 
-typedef void (*rb_augment_f)(struct rb_node *node, void *data);
-
-extern void rb_augment_insert(struct rb_node *node,
-			      rb_augment_f func, void *data);
-extern struct rb_node *rb_augment_erase_begin(struct rb_node *node);
-extern void rb_augment_erase_end(struct rb_node *node,
-				 rb_augment_f func, void *data);
-
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
 extern struct rb_node *rb_prev(const struct rb_node *);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index a37ee7954b8..c0088ca345f 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -538,77 +538,6 @@ void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 }
 EXPORT_SYMBOL(rb_erase_augmented);
 
-static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
-{
-	struct rb_node *parent;
-
-up:
-	func(node, data);
-	parent = rb_parent(node);
-	if (!parent)
-		return;
-
-	if (node == parent->rb_left && parent->rb_right)
-		func(parent->rb_right, data);
-	else if (parent->rb_left)
-		func(parent->rb_left, data);
-
-	node = parent;
-	goto up;
-}
-
-/*
- * after inserting @node into the tree, update the tree to account for
- * both the new entry and any damage done by rebalance
- */
-void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
-{
-	if (node->rb_left)
-		node = node->rb_left;
-	else if (node->rb_right)
-		node = node->rb_right;
-
-	rb_augment_path(node, func, data);
-}
-EXPORT_SYMBOL(rb_augment_insert);
-
-/*
- * before removing the node, find the deepest node on the rebalance path
- * that will still be there after @node gets removed
- */
-struct rb_node *rb_augment_erase_begin(struct rb_node *node)
-{
-	struct rb_node *deepest;
-
-	if (!node->rb_right && !node->rb_left)
-		deepest = rb_parent(node);
-	else if (!node->rb_right)
-		deepest = node->rb_left;
-	else if (!node->rb_left)
-		deepest = node->rb_right;
-	else {
-		deepest = rb_next(node);
-		if (deepest->rb_right)
-			deepest = deepest->rb_right;
-		else if (rb_parent(deepest) != node)
-			deepest = rb_parent(deepest);
-	}
-
-	return deepest;
-}
-EXPORT_SYMBOL(rb_augment_erase_begin);
-
-/*
- * after removal, update the tree to account for the removed entry
- * and any rebalance damage.
- */
-void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
-{
-	if (node)
-		rb_augment_path(node, func, data);
-}
-EXPORT_SYMBOL(rb_augment_erase_end);
-
 /*
  * This function returns the first node (in sort order) of the tree.
  */
-- 
cgit v1.2.3-70-g09d2


From 3908836aa77e3621aaf2101f2920e01d7c8460d6 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:21 -0700
Subject: rbtree: add RB_DECLARE_CALLBACKS() macro

As proposed by Peter Zijlstra, this makes it easier to define the augmented
rbtree callbacks.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat_rbtree.c | 37 ++-----------------------------------
 include/linux/rbtree.h   | 30 ++++++++++++++++++++++++++++++
 lib/rbtree_test.c        | 34 ++--------------------------------
 3 files changed, 34 insertions(+), 67 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 7e1515bd477..4d116959075 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -69,41 +69,8 @@ static u64 compute_subtree_max_end(struct memtype *data)
 	return max_end;
 }
 
-/* Update 'subtree_max_end' for node and its parents */
-static void memtype_rb_propagate_cb(struct rb_node *node, struct rb_node *stop)
-{
-	while (node != stop) {
-		struct memtype *data = container_of(node, struct memtype, rb);
-		u64 subtree_max_end = compute_subtree_max_end(data);
-		if (data->subtree_max_end == subtree_max_end)
-			break;
-		data->subtree_max_end = subtree_max_end;
-		node = rb_parent(&data->rb);
-	}
-}
-
-static void memtype_rb_copy_cb(struct rb_node *old, struct rb_node *new)
-{
-	struct memtype *old_data = container_of(old, struct memtype, rb);
-	struct memtype *new_data = container_of(new, struct memtype, rb);
-
-	new_data->subtree_max_end = old_data->subtree_max_end;
-}
-
-/* Update 'subtree_max_end' after tree rotation. old and new are the
- * former and current subtree roots */
-static void memtype_rb_rotate_cb(struct rb_node *old, struct rb_node *new)
-{
-	struct memtype *old_data = container_of(old, struct memtype, rb);
-	struct memtype *new_data = container_of(new, struct memtype, rb);
-
-	new_data->subtree_max_end = old_data->subtree_max_end;
-	old_data->subtree_max_end = compute_subtree_max_end(old_data);
-}
-
-static const struct rb_augment_callbacks memtype_rb_augment_cb = {
-	memtype_rb_propagate_cb, memtype_rb_copy_cb, memtype_rb_rotate_cb
-};
+RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
+		     u64, subtree_max_end, compute_subtree_max_end)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 4ace31b3338..8d1e83b1c87 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -79,6 +79,36 @@ rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 	__rb_insert_augmented(node, root, augment->rotate);
 }
 
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	      \
+			     rbtype, rbaugmented, rbcompute)		      \
+static void rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)    \
+{									      \
+	while (rb != stop) {						      \
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	      \
+		rbtype augmented = rbcompute(node);			      \
+		if (node->rbaugmented == augmented)			      \
+			break;						      \
+		node->rbaugmented = augmented;				      \
+		rb = rb_parent(&node->rbfield);				      \
+	}								      \
+}									      \
+static void rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)   \
+{									      \
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
+	new->rbaugmented = old->rbaugmented;				      \
+}									      \
+static void rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{									      \
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
+	new->rbaugmented = old->rbaugmented;				      \
+	old->rbaugmented = rbcompute(old);				      \
+}									      \
+rbstatic const struct rb_augment_callbacks rbname = {			      \
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	      \
+};
+
 
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index e28345df09b..b20e99969b0 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -61,38 +61,8 @@ static inline u32 augment_recompute(struct test_node *node)
 	return max;
 }
 
-static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
-{
-	while (rb != stop) {
-		struct test_node *node = rb_entry(rb, struct test_node, rb);
-		u32 augmented = augment_recompute(node);
-		if (node->augmented == augmented)
-			break;
-		node->augmented = augmented;
-		rb = rb_parent(&node->rb);
-	}
-}
-
-static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-	struct test_node *old = rb_entry(rb_old, struct test_node, rb);
-	struct test_node *new = rb_entry(rb_new, struct test_node, rb);
-	new->augmented = old->augmented;
-}
-
-static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-	struct test_node *old = rb_entry(rb_old, struct test_node, rb);
-	struct test_node *new = rb_entry(rb_new, struct test_node, rb);
-
-	/* Rotation doesn't change subtree's augmented value */
-	new->augmented = old->augmented;
-	old->augmented = augment_recompute(old);
-}
-
-static const struct rb_augment_callbacks augment_callbacks = {
-	augment_propagate, augment_copy, augment_rotate
-};
+RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb,
+		     u32, augmented, augment_recompute)
 
 static void insert_augmented(struct test_node *node, struct rb_root *root)
 {
-- 
cgit v1.2.3-70-g09d2


From 6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:25 -0700
Subject: mm: replace vma prio_tree with an interval tree

Implement an interval tree as a replacement for the VMA prio_tree.  The
algorithms are similar to lib/interval_tree.c; however that code can't be
directly reused as the interval endpoints are not explicitly stored in the
VMA.  So instead, the common algorithm is moved into a template and the
details (node type, how to get interval endpoints from the node, etc) are
filled in using the C preprocessor.

Once the interval tree functions are available, using them as a
replacement to the VMA prio tree is a relatively simple, mechanical job.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault-armv.c           |   3 +-
 arch/arm/mm/flush.c                |   3 +-
 arch/parisc/kernel/cache.c         |   3 +-
 arch/x86/mm/hugetlbpage.c          |   3 +-
 fs/hugetlbfs/inode.c               |   9 +-
 fs/inode.c                         |   2 +-
 include/linux/fs.h                 |   6 +-
 include/linux/interval_tree_tmpl.h | 215 +++++++++++++++++++++++++++++++++++++
 include/linux/mm.h                 |  30 +++---
 include/linux/mm_types.h           |  14 +--
 kernel/events/uprobes.c            |   3 +-
 kernel/fork.c                      |   2 +-
 lib/interval_tree.c                | 166 ++--------------------------
 lib/prio_tree.c                    |  19 +---
 mm/Makefile                        |   4 +-
 mm/filemap_xip.c                   |   3 +-
 mm/fremap.c                        |   2 +-
 mm/hugetlb.c                       |   3 +-
 mm/interval_tree.c                 |  61 +++++++++++
 mm/memory-failure.c                |   3 +-
 mm/memory.c                        |   9 +-
 mm/mmap.c                          |  22 ++--
 mm/nommu.c                         |  12 +--
 mm/prio_tree.c                     | 208 -----------------------------------
 mm/rmap.c                          |  18 ++--
 25 files changed, 357 insertions(+), 466 deletions(-)
 create mode 100644 include/linux/interval_tree_tmpl.h
 create mode 100644 mm/interval_tree.c
 delete mode 100644 mm/prio_tree.c

(limited to 'arch/x86')

diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7..2a5907b5c8d 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	unsigned long offset;
 	pgoff_t pgoff;
 	int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 	 * cache coherency.
 	 */
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		/*
 		 * If this VMA is not in our MM, we can ignore it.
 		 * Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5..1c8f7f56417 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 {
 	struct mm_struct *mm = current->active_mm;
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	pgoff_t pgoff;
 
 	/*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long offset;
 
 		/*
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 9d181890a7e..48e16dc2010 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	unsigned long offset;
 	unsigned long addr, old_addr = 0;
 	pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
 	 * to flush one address here for them all to become coherent */
 
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		addr = mpnt->vm_start + offset;
 
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e4851242..937bff5cdaa 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
-	struct prio_tree_iter iter;
 	struct vm_area_struct *svma;
 	unsigned long saddr;
 	pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		return (pte_t *)pmd_alloc(mm, pud, addr);
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
 		if (svma == vma)
 			continue;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0a0ab8e21b1..c5bc355d824 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 }
 
 static inline void
-hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
+hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 
-	vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
+	vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
 		unsigned long v_offset;
 
 		/*
 		 * Can the expression below overflow on 32-bit arches?
-		 * No, because the prio_tree returns us only those vmas
+		 * No, because the interval tree returns us only those vmas
 		 * which overlap the truncated area starting at pgoff,
 		 * and no vma on a 32-bit arch can span beyond the 4GB.
 		 */
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 
 	i_size_write(inode, offset);
 	mutex_lock(&mapping->i_mmap_mutex);
-	if (!prio_tree_empty(&mapping->i_mmap))
+	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	mutex_unlock(&mapping->i_mmap_mutex);
 	truncate_hugepages(inode, offset);
diff --git a/fs/inode.c b/fs/inode.c
index ac8d904b3f1..b03c7195724 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
 	mutex_init(&mapping->i_mmap_mutex);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
-	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+	mapping->i_mmap = RB_ROOT;
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
 }
 EXPORT_SYMBOL(address_space_init_once);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a8a273d5b2..c617ed024df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,7 @@ struct inodes_stat_t {
 #include <linux/cache.h>
 #include <linux/list.h>
 #include <linux/radix-tree.h>
-#include <linux/prio_tree.h>
+#include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
 #include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		tree_lock;	/* and lock protecting it */
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
-	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
+	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
 	/* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
  */
 static inline int mapping_mapped(struct address_space *mapping)
 {
-	return	!prio_tree_empty(&mapping->i_mmap) ||
+	return	!RB_EMPTY_ROOT(&mapping->i_mmap) ||
 		!list_empty(&mapping->i_mmap_nonlinear);
 }
 
diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h
new file mode 100644
index 00000000000..c65deda3141
--- /dev/null
+++ b/include/linux/interval_tree_tmpl.h
@@ -0,0 +1,215 @@
+/*
+  Interval Trees
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  include/linux/interval_tree_tmpl.h
+*/
+
+/*
+ * Template for implementing interval trees
+ *
+ * ITSTRUCT:   struct type of the interval tree nodes
+ * ITRB:       name of struct rb_node field within ITSTRUCT
+ * ITTYPE:     type of the interval endpoints
+ * ITSUBTREE:  name of ITTYPE field within ITSTRUCT holding last-in-subtree
+ * ITSTART(n): start endpoint of ITSTRUCT node n
+ * ITLAST(n):  last endpoing of ITSTRUCT node n
+ * ITSTATIC:   'static' or empty
+ * ITPREFIX:   prefix to use for the inline tree definitions
+ */
+
+/* IT(name) -> ITPREFIX_name */
+#define _ITNAME(prefix, name) prefix ## _ ## name
+#define ITNAME(prefix, name) _ITNAME(prefix, name)
+#define IT(name) ITNAME(ITPREFIX, name)
+
+/* Callbacks for augmented rbtree insert and remove */
+
+static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
+{
+	ITTYPE max = ITLAST(node), subtree_last;
+	if (node->ITRB.rb_left) {
+		subtree_last = rb_entry(node->ITRB.rb_left,
+					ITSTRUCT, ITRB)->ITSUBTREE;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	if (node->ITRB.rb_right) {
+		subtree_last = rb_entry(node->ITRB.rb_right,
+					ITSTRUCT, ITRB)->ITSUBTREE;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	return max;
+}
+
+static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
+{
+	while (rb != stop) {
+		ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
+		ITTYPE subtree_last = IT(compute_subtree_last)(node);
+		if (node->ITSUBTREE == subtree_last)
+			break;
+		node->ITSUBTREE = subtree_last;
+		rb = rb_parent(&node->ITRB);
+	}
+}
+
+static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
+	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
+
+	new->ITSUBTREE = old->ITSUBTREE;
+}
+
+static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
+	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
+
+	new->ITSUBTREE = old->ITSUBTREE;
+	old->ITSUBTREE = IT(compute_subtree_last)(old);
+}
+
+static const struct rb_augment_callbacks IT(augment_callbacks) = {
+	IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
+};
+
+/* Insert / remove interval nodes from the tree */
+
+ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
+{
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	ITTYPE start = ITSTART(node), last = ITLAST(node);
+	ITSTRUCT *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
+		if (parent->ITSUBTREE < last)
+			parent->ITSUBTREE = last;
+		if (start < ITSTART(parent))
+			link = &parent->ITRB.rb_left;
+		else
+			link = &parent->ITRB.rb_right;
+	}
+
+	node->ITSUBTREE = last;
+	rb_link_node(&node->ITRB, rb_parent, link);
+	rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
+}
+
+ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
+{
+	rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
+}
+
+/*
+ * Iterate over intervals intersecting [start;last]
+ *
+ * Note that a node's interval intersects [start;last] iff:
+ *   Cond1: ITSTART(node) <= last
+ * and
+ *   Cond2: start <= ITLAST(node)
+ */
+
+static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
+{
+	while (true) {
+		/*
+		 * Loop invariant: start <= node->ITSUBTREE
+		 * (Cond2 is satisfied by one of the subtree nodes)
+		 */
+		if (node->ITRB.rb_left) {
+			ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
+						  ITSTRUCT, ITRB);
+			if (start <= left->ITSUBTREE) {
+				/*
+				 * Some nodes in left subtree satisfy Cond2.
+				 * Iterate to find the leftmost such node N.
+				 * If it also satisfies Cond1, that's the match
+				 * we are looking for. Otherwise, there is no
+				 * matching interval as nodes to the right of N
+				 * can't satisfy Cond1 either.
+				 */
+				node = left;
+				continue;
+			}
+		}
+		if (ITSTART(node) <= last) {		/* Cond1 */
+			if (start <= ITLAST(node))	/* Cond2 */
+				return node;	/* node is leftmost match */
+			if (node->ITRB.rb_right) {
+				node = rb_entry(node->ITRB.rb_right,
+						ITSTRUCT, ITRB);
+				if (start <= node->ITSUBTREE)
+					continue;
+			}
+		}
+		return NULL;	/* No match */
+	}
+}
+
+ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
+				  ITTYPE start, ITTYPE last)
+{
+	ITSTRUCT *node;
+
+	if (!root->rb_node)
+		return NULL;
+	node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
+	if (node->ITSUBTREE < start)
+		return NULL;
+	return IT(subtree_search)(node, start, last);
+}
+
+ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
+{
+	struct rb_node *rb = node->ITRB.rb_right, *prev;
+
+	while (true) {
+		/*
+		 * Loop invariants:
+		 *   Cond1: ITSTART(node) <= last
+		 *   rb == node->ITRB.rb_right
+		 *
+		 * First, search right subtree if suitable
+		 */
+		if (rb) {
+			ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
+			if (start <= right->ITSUBTREE)
+				return IT(subtree_search)(right, start, last);
+		}
+
+		/* Move up the tree until we come from a node's left child */
+		do {
+			rb = rb_parent(&node->ITRB);
+			if (!rb)
+				return NULL;
+			prev = &node->ITRB;
+			node = rb_entry(rb, ITSTRUCT, ITRB);
+			rb = node->ITRB.rb_right;
+		} while (prev == rb);
+
+		/* Check if the node intersects [start;last] */
+		if (last < ITSTART(node))		/* !Cond1 */
+			return NULL;
+		else if (start <= ITLAST(node))		/* Cond2 */
+			return node;
+	}
+}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5ddb11b2b4b..0f671ef09eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,7 +10,6 @@
 #include <linux/list.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
-#include <linux/prio_tree.h>
 #include <linux/atomic.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
@@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
-/* prio_tree.c */
-void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
-void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
-void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
-struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
-	struct prio_tree_iter *iter);
-
-#define vma_prio_tree_foreach(vma, iter, root, begin, end)	\
-	for (prio_tree_iter_init(iter, root, begin, end), vma = NULL;	\
-		(vma = vma_prio_tree_next(vma, iter)); )
+/* interval_tree.c */
+void vma_interval_tree_add(struct vm_area_struct *vma,
+			   struct vm_area_struct *old,
+			   struct address_space *mapping);
+void vma_interval_tree_insert(struct vm_area_struct *node,
+			      struct rb_root *root);
+void vma_interval_tree_remove(struct vm_area_struct *node,
+			      struct rb_root *root);
+struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
+				unsigned long start, unsigned long last);
+struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
+				unsigned long start, unsigned long last);
+
+#define vma_interval_tree_foreach(vma, root, start, last)		\
+	for (vma = vma_interval_tree_iter_first(root, start, last);	\
+	     vma; vma = vma_interval_tree_iter_next(vma, start, last))
 
 static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 					struct list_head *list)
 {
-	vma->shared.vm_set.parent = NULL;
-	list_add_tail(&vma->shared.vm_set.list, list);
+	list_add_tail(&vma->shared.nonlinear, list);
 }
 
 /* mmap.c */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a57a43f5ca7..31f8a3af7d9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -6,7 +6,6 @@
 #include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/prio_tree.h>
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
@@ -240,18 +239,15 @@ struct vm_area_struct {
 
 	/*
 	 * For areas with an address space and backing store,
-	 * linkage into the address_space->i_mmap prio tree, or
-	 * linkage to the list of like vmas hanging off its node, or
+	 * linkage into the address_space->i_mmap interval tree, or
 	 * linkage of vma in the address_space->i_mmap_nonlinear list.
 	 */
 	union {
 		struct {
-			struct list_head list;
-			void *parent;	/* aligns with prio_tree_node parent */
-			struct vm_area_struct *head;
-		} vm_set;
-
-		struct raw_prio_tree_node prio_tree_node;
+			struct rb_node rb;
+			unsigned long rb_subtree_last;
+		} linear;
+		struct list_head nonlinear;
 	} shared;
 
 	/*
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 912ef48d28a..1d9c0a98596 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -735,7 +735,6 @@ static struct map_info *
 build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 {
 	unsigned long pgoff = offset >> PAGE_SHIFT;
-	struct prio_tree_iter iter;
 	struct vm_area_struct *vma;
 	struct map_info *curr = NULL;
 	struct map_info *prev = NULL;
@@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 
  again:
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		if (!valid_vma(vma, is_register))
 			continue;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 972762e0102..90dace52715 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 				mapping->i_mmap_writable++;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
-			vma_prio_tree_add(tmp, mpnt);
+			vma_interval_tree_add(tmp, mpnt, mapping);
 			flush_dcache_mmap_unlock(mapping);
 			mutex_unlock(&mapping->i_mmap_mutex);
 		}
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index 6fd540b1e49..77a793e0644 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,159 +1,13 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 
-/* Callbacks for augmented rbtree insert and remove */
-
-static inline unsigned long
-compute_subtree_last(struct interval_tree_node *node)
-{
-	unsigned long max = node->last, subtree_last;
-	if (node->rb.rb_left) {
-		subtree_last = rb_entry(node->rb.rb_left,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	if (node->rb.rb_right) {
-		subtree_last = rb_entry(node->rb.rb_right,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	return max;
-}
-
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb,
-		     unsigned long, __subtree_last, compute_subtree_last)
-
-/* Insert / remove interval nodes from the tree */
-
-void interval_tree_insert(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
-	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
-	unsigned long start = node->start, last = node->last;
-	struct interval_tree_node *parent;
-
-	while (*link) {
-		rb_parent = *link;
-		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
-		if (parent->__subtree_last < last)
-			parent->__subtree_last = last;
-		if (start < parent->start)
-			link = &parent->rb.rb_left;
-		else
-			link = &parent->rb.rb_right;
-	}
-
-	node->__subtree_last = last;
-	rb_link_node(&node->rb, rb_parent, link);
-	rb_insert_augmented(&node->rb, root, &augment_callbacks);
-}
-
-void interval_tree_remove(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
-	rb_erase_augmented(&node->rb, root, &augment_callbacks);
-}
-
-/*
- * Iterate over intervals intersecting [start;last]
- *
- * Note that a node's interval intersects [start;last] iff:
- *   Cond1: node->start <= last
- * and
- *   Cond2: start <= node->last
- */
-
-static struct interval_tree_node *
-subtree_search(struct interval_tree_node *node,
-	       unsigned long start, unsigned long last)
-{
-	while (true) {
-		/*
-		 * Loop invariant: start <= node->__subtree_last
-		 * (Cond2 is satisfied by one of the subtree nodes)
-		 */
-		if (node->rb.rb_left) {
-			struct interval_tree_node *left =
-				rb_entry(node->rb.rb_left,
-					 struct interval_tree_node, rb);
-			if (start <= left->__subtree_last) {
-				/*
-				 * Some nodes in left subtree satisfy Cond2.
-				 * Iterate to find the leftmost such node N.
-				 * If it also satisfies Cond1, that's the match
-				 * we are looking for. Otherwise, there is no
-				 * matching interval as nodes to the right of N
-				 * can't satisfy Cond1 either.
-				 */
-				node = left;
-				continue;
-			}
-		}
-		if (node->start <= last) {		/* Cond1 */
-			if (start <= node->last)	/* Cond2 */
-				return node;	/* node is leftmost match */
-			if (node->rb.rb_right) {
-				node = rb_entry(node->rb.rb_right,
-					struct interval_tree_node, rb);
-				if (start <= node->__subtree_last)
-					continue;
-			}
-		}
-		return NULL;	/* No match */
-	}
-}
-
-struct interval_tree_node *
-interval_tree_iter_first(struct rb_root *root,
-			 unsigned long start, unsigned long last)
-{
-	struct interval_tree_node *node;
-
-	if (!root->rb_node)
-		return NULL;
-	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
-	if (node->__subtree_last < start)
-		return NULL;
-	return subtree_search(node, start, last);
-}
-
-struct interval_tree_node *
-interval_tree_iter_next(struct interval_tree_node *node,
-			unsigned long start, unsigned long last)
-{
-	struct rb_node *rb = node->rb.rb_right, *prev;
-
-	while (true) {
-		/*
-		 * Loop invariants:
-		 *   Cond1: node->start <= last
-		 *   rb == node->rb.rb_right
-		 *
-		 * First, search right subtree if suitable
-		 */
-		if (rb) {
-			struct interval_tree_node *right =
-				rb_entry(rb, struct interval_tree_node, rb);
-			if (start <= right->__subtree_last)
-				return subtree_search(right, start, last);
-		}
-
-		/* Move up the tree until we come from a node's left child */
-		do {
-			rb = rb_parent(&node->rb);
-			if (!rb)
-				return NULL;
-			prev = &node->rb;
-			node = rb_entry(rb, struct interval_tree_node, rb);
-			rb = node->rb.rb_right;
-		} while (prev == rb);
-
-		/* Check if the node intersects [start;last] */
-		if (last < node->start)		/* !Cond1 */
-			return NULL;
-		else if (start <= node->last)	/* Cond2 */
-			return node;
-	}
-}
+#define ITSTRUCT   struct interval_tree_node
+#define ITRB       rb
+#define ITTYPE     unsigned long
+#define ITSUBTREE  __subtree_last
+#define ITSTART(n) ((n)->start)
+#define ITLAST(n)  ((n)->last)
+#define ITSTATIC
+#define ITPREFIX   interval_tree
+
+#include <linux/interval_tree_tmpl.h>
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
index 4e0d2edff2b..bba37148c15 100644
--- a/lib/prio_tree.c
+++ b/lib/prio_tree.c
@@ -44,27 +44,12 @@
  * The following macros are used for implementing prio_tree for i_mmap
  */
 
-#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
-#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
-/* avoid overflow */
-#define HEAP_INDEX(vma)	  ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
-
-
 static void get_index(const struct prio_tree_root *root,
     const struct prio_tree_node *node,
     unsigned long *radix, unsigned long *heap)
 {
-	if (root->raw) {
-		struct vm_area_struct *vma = prio_tree_entry(
-		    node, struct vm_area_struct, shared.prio_tree_node);
-
-		*radix = RADIX_INDEX(vma);
-		*heap = HEAP_INDEX(vma);
-	}
-	else {
-		*radix = node->start;
-		*heap = node->last;
-	}
+	*radix = node->start;
+	*heap = node->last;
 }
 
 static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
diff --git a/mm/Makefile b/mm/Makefile
index 92753e2d82d..6b025f80af3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -14,9 +14,9 @@ endif
 obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
-			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
+			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o $(mmu-y)
+			   compaction.o interval_tree.o $(mmu-y)
 
 obj-y += init-mm.o
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 91750227a19..a52daee11d3 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping,
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
-	struct prio_tree_iter iter;
 	unsigned long address;
 	pte_t *pte;
 	pte_t pteval;
@@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
 
 retry:
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		mm = vma->vm_mm;
 		address = vma->vm_start +
 			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
diff --git a/mm/fremap.c b/mm/fremap.c
index 3d731a49878..3899a86851c 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
 		vma->vm_flags |= VM_NONLINEAR;
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bb534254f..c9b40e3a993 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct hstate *h = hstate_vma(vma);
 	struct vm_area_struct *iter_vma;
 	struct address_space *mapping;
-	struct prio_tree_iter iter;
 	pgoff_t pgoff;
 
 	/*
@@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
 			continue;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
new file mode 100644
index 00000000000..7dc565660e5
--- /dev/null
+++ b/mm/interval_tree.c
@@ -0,0 +1,61 @@
+/*
+ * mm/interval_tree.c - interval tree for mapping->i_mmap
+ *
+ * Copyright (C) 2012, Michel Lespinasse <walken@google.com>
+ *
+ * This file is released under the GPL v2.
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+#define ITSTRUCT   struct vm_area_struct
+#define ITRB       shared.linear.rb
+#define ITTYPE     unsigned long
+#define ITSUBTREE  shared.linear.rb_subtree_last
+#define ITSTART(n) ((n)->vm_pgoff)
+#define ITLAST(n)  ((n)->vm_pgoff + \
+		    (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1)
+#define ITSTATIC
+#define ITPREFIX   vma_interval_tree
+
+#include <linux/interval_tree_tmpl.h>
+
+/* Insert old immediately after vma in the interval tree */
+void vma_interval_tree_add(struct vm_area_struct *vma,
+			   struct vm_area_struct *old,
+			   struct address_space *mapping)
+{
+	struct rb_node **link;
+	struct vm_area_struct *parent;
+	unsigned long last;
+
+	if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
+		list_add(&vma->shared.nonlinear, &old->shared.nonlinear);
+		return;
+	}
+
+	last = ITLAST(vma);
+
+	if (!old->shared.linear.rb.rb_right) {
+		parent = old;
+		link = &old->shared.linear.rb.rb_right;
+	} else {
+		parent = rb_entry(old->shared.linear.rb.rb_right,
+				  struct vm_area_struct, shared.linear.rb);
+		if (parent->shared.linear.rb_subtree_last < last)
+			parent->shared.linear.rb_subtree_last = last;
+		while (parent->shared.linear.rb.rb_left) {
+			parent = rb_entry(parent->shared.linear.rb.rb_left,
+				struct vm_area_struct, shared.linear.rb);
+			if (parent->shared.linear.rb_subtree_last < last)
+				parent->shared.linear.rb_subtree_last = last;
+		}
+		link = &parent->shared.linear.rb.rb_left;
+	}
+
+	vma->shared.linear.rb_subtree_last = last;
+	rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link);
+	rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap,
+			    &vma_interval_tree_augment_callbacks);
+}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a6e2141a661..c38a6257d08 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
-	struct prio_tree_iter iter;
 	struct address_space *mapping = page->mapping;
 
 	mutex_lock(&mapping->i_mmap_mutex);
@@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 		if (!task_early_kill(tsk))
 			continue;
 
-		vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
 				      pgoff) {
 			/*
 			 * Send early kill signal to tasks where a vma covers
diff --git a/mm/memory.c b/mm/memory.c
index e09c0481318..d205e4381a3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 	zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
 }
 
-static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
+static inline void unmap_mapping_range_tree(struct rb_root *root,
 					    struct zap_details *details)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	pgoff_t vba, vea, zba, zea;
 
-	vma_prio_tree_foreach(vma, &iter, root,
+	vma_interval_tree_foreach(vma, root,
 			details->first_index, details->last_index) {
 
 		vba = vma->vm_pgoff;
@@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
 	 * across *all* the pages in each nonlinear VMA, not just the pages
 	 * whose virtual address lies outside the file truncation point.
 	 */
-	list_for_each_entry(vma, head, shared.vm_set.list) {
+	list_for_each_entry(vma, head, shared.nonlinear) {
 		details->nonlinear_vma = vma;
 		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
 	}
@@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping,
 
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3c365ff1b6..5ac533f88e9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 
 	flush_dcache_mmap_lock(mapping);
 	if (unlikely(vma->vm_flags & VM_NONLINEAR))
-		list_del_init(&vma->shared.vm_set.list);
+		list_del_init(&vma->shared.nonlinear);
 	else
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 	flush_dcache_mmap_unlock(mapping);
 }
 
 /*
- * Unlink a file-based vm structure from its prio_tree, to hide
+ * Unlink a file-based vm structure from its interval tree, to hide
  * vma from rmap and vmtruncate before freeing its page tables.
  */
 void unlink_file_vma(struct vm_area_struct *vma)
@@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
 		if (unlikely(vma->vm_flags & VM_NONLINEAR))
 			vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		else
-			vma_prio_tree_insert(vma, &mapping->i_mmap);
+			vma_interval_tree_insert(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 	}
 }
@@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 
 /*
  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
- * mm's list and rbtree.  It has already been inserted into the prio_tree.
+ * mm's list and rbtree.  It has already been inserted into the interval tree.
  */
 static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
@@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	struct vm_area_struct *next = vma->vm_next;
 	struct vm_area_struct *importer = NULL;
 	struct address_space *mapping = NULL;
-	struct prio_tree_root *root = NULL;
+	struct rb_root *root = NULL;
 	struct anon_vma *anon_vma = NULL;
 	struct file *file = vma->vm_file;
 	long adjust_next = 0;
@@ -554,7 +554,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		mutex_lock(&mapping->i_mmap_mutex);
 		if (insert) {
 			/*
-			 * Put into prio_tree now, so instantiated pages
+			 * Put into interval tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
 			 * throughout; but we cannot insert into address
 			 * space until vma start or end is updated.
@@ -582,9 +582,9 @@ again:			remove_next = 1 + (end > next->vm_end);
 
 	if (root) {
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_remove(vma, root);
+		vma_interval_tree_remove(vma, root);
 		if (adjust_next)
-			vma_prio_tree_remove(next, root);
+			vma_interval_tree_remove(next, root);
 	}
 
 	vma->vm_start = start;
@@ -597,8 +597,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 
 	if (root) {
 		if (adjust_next)
-			vma_prio_tree_insert(next, root);
-		vma_prio_tree_insert(vma, root);
+			vma_interval_tree_insert(next, root);
+		vma_interval_tree_insert(vma, root);
 		flush_dcache_mmap_unlock(mapping);
 	}
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 12e84e69dd0..45131b41bcd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_insert(vma, &mapping->i_mmap);
+		vma_interval_tree_insert(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
@@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
@@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 				size_t newsize)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	struct vm_region *region;
 	pgoff_t low, high;
 	size_t r_size, r_top;
@@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	mutex_lock(&inode->i_mapping->i_mmap_mutex);
 
 	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      low, high) {
+	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
 		/* found one - only interested if it's shared out of the page
 		 * cache */
 		if (vma->vm_flags & VM_SHARED) {
@@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	 * we don't check for any regions that start beyond the EOF as there
 	 * shouldn't be any
 	 */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      0, ULONG_MAX) {
+	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
+				  0, ULONG_MAX) {
 		if (!(vma->vm_flags & VM_SHARED))
 			continue;
 
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
deleted file mode 100644
index 799dcfd7cd8..00000000000
--- a/mm/prio_tree.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * mm/prio_tree.c - priority search tree for mapping->i_mmap
- *
- * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
- *
- * This file is released under the GPL v2.
- *
- * Based on the radix priority search tree proposed by Edward M. McCreight
- * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
- *
- * 02Feb2004	Initial version
- */
-
-#include <linux/mm.h>
-#include <linux/prio_tree.h>
-#include <linux/prefetch.h>
-
-/*
- * See lib/prio_tree.c for details on the general radix priority search tree
- * code.
- */
-
-/*
- * The following #defines are mirrored from lib/prio_tree.c. They're only used
- * for debugging, and should be removed (along with the debugging code using
- * them) when switching also VMAs to the regular prio_tree code.
- */
-
-#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
-#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
-/* avoid overflow */
-#define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
-
-/*
- * Radix priority search tree for address_space->i_mmap
- *
- * For each vma that map a unique set of file pages i.e., unique [radix_index,
- * heap_index] value, we have a corresponding priority search tree node. If
- * multiple vmas have identical [radix_index, heap_index] value, then one of
- * them is used as a tree node and others are stored in a vm_set list. The tree
- * node points to the first vma (head) of the list using vm_set.head.
- *
- * prio_tree_root
- *      |
- *      A       vm_set.head
- *     / \      /
- *    L   R -> H-I-J-K-M-N-O-P-Q-S
- *    ^   ^    <-- vm_set.list -->
- *  tree nodes
- *
- * We need some way to identify whether a vma is a tree node, head of a vm_set
- * list, or just a member of a vm_set list. We cannot use vm_flags to store
- * such information. The reason is, in the above figure, it is possible that
- * vm_flags' of R and H are covered by the different mmap_sems. When R is
- * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
- * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
- * That's why some trick involving shared.vm_set.parent is used for identifying
- * tree nodes and list head nodes.
- *
- * vma radix priority search tree node rules:
- *
- * vma->shared.vm_set.parent != NULL    ==> a tree node
- *      vma->shared.vm_set.head != NULL ==> list of others mapping same range
- *      vma->shared.vm_set.head == NULL ==> no others map the same range
- *
- * vma->shared.vm_set.parent == NULL
- * 	vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
- * 	vma->shared.vm_set.head == NULL ==> a list node
- */
-
-/*
- * Add a new vma known to map the same set of pages as the old vma:
- * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
- * Note that it just happens to work correctly on i_mmap_nonlinear too.
- */
-void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
-{
-	/* Leave these BUG_ONs till prio_tree patch stabilizes */
-	BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
-	BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
-
-	vma->shared.vm_set.head = NULL;
-	vma->shared.vm_set.parent = NULL;
-
-	if (!old->shared.vm_set.parent)
-		list_add(&vma->shared.vm_set.list,
-				&old->shared.vm_set.list);
-	else if (old->shared.vm_set.head)
-		list_add_tail(&vma->shared.vm_set.list,
-				&old->shared.vm_set.head->shared.vm_set.list);
-	else {
-		INIT_LIST_HEAD(&vma->shared.vm_set.list);
-		vma->shared.vm_set.head = old;
-		old->shared.vm_set.head = vma;
-	}
-}
-
-void vma_prio_tree_insert(struct vm_area_struct *vma,
-			  struct prio_tree_root *root)
-{
-	struct prio_tree_node *ptr;
-	struct vm_area_struct *old;
-
-	vma->shared.vm_set.head = NULL;
-
-	ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
-	if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
-		old = prio_tree_entry(ptr, struct vm_area_struct,
-					shared.prio_tree_node);
-		vma_prio_tree_add(vma, old);
-	}
-}
-
-void vma_prio_tree_remove(struct vm_area_struct *vma,
-			  struct prio_tree_root *root)
-{
-	struct vm_area_struct *node, *head, *new_head;
-
-	if (!vma->shared.vm_set.head) {
-		if (!vma->shared.vm_set.parent)
-			list_del_init(&vma->shared.vm_set.list);
-		else
-			raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
-	} else {
-		/* Leave this BUG_ON till prio_tree patch stabilizes */
-		BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
-		if (vma->shared.vm_set.parent) {
-			head = vma->shared.vm_set.head;
-			if (!list_empty(&head->shared.vm_set.list)) {
-				new_head = list_entry(
-					head->shared.vm_set.list.next,
-					struct vm_area_struct,
-					shared.vm_set.list);
-				list_del_init(&head->shared.vm_set.list);
-			} else
-				new_head = NULL;
-
-			raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
-					&head->shared.prio_tree_node);
-			head->shared.vm_set.head = new_head;
-			if (new_head)
-				new_head->shared.vm_set.head = head;
-
-		} else {
-			node = vma->shared.vm_set.head;
-			if (!list_empty(&vma->shared.vm_set.list)) {
-				new_head = list_entry(
-					vma->shared.vm_set.list.next,
-					struct vm_area_struct,
-					shared.vm_set.list);
-				list_del_init(&vma->shared.vm_set.list);
-				node->shared.vm_set.head = new_head;
-				new_head->shared.vm_set.head = node;
-			} else
-				node->shared.vm_set.head = NULL;
-		}
-	}
-}
-
-/*
- * Helper function to enumerate vmas that map a given file page or a set of
- * contiguous file pages. The function returns vmas that at least map a single
- * page in the given range of contiguous file pages.
- */
-struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
-					struct prio_tree_iter *iter)
-{
-	struct prio_tree_node *ptr;
-	struct vm_area_struct *next;
-
-	if (!vma) {
-		/*
-		 * First call is with NULL vma
-		 */
-		ptr = prio_tree_next(iter);
-		if (ptr) {
-			next = prio_tree_entry(ptr, struct vm_area_struct,
-						shared.prio_tree_node);
-			prefetch(next->shared.vm_set.head);
-			return next;
-		} else
-			return NULL;
-	}
-
-	if (vma->shared.vm_set.parent) {
-		if (vma->shared.vm_set.head) {
-			next = vma->shared.vm_set.head;
-			prefetch(next->shared.vm_set.list.next);
-			return next;
-		}
-	} else {
-		next = list_entry(vma->shared.vm_set.list.next,
-				struct vm_area_struct, shared.vm_set.list);
-		if (!next->shared.vm_set.head) {
-			prefetch(next->shared.vm_set.list.next);
-			return next;
-		}
-	}
-
-	ptr = prio_tree_next(iter);
-	if (ptr) {
-		next = prio_tree_entry(ptr, struct vm_area_struct,
-					shared.prio_tree_node);
-		prefetch(next->shared.vm_set.head);
-		return next;
-	} else
-		return NULL;
-}
diff --git a/mm/rmap.c b/mm/rmap.c
index 0f3b7cda2a2..7b5b51d25fc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page,
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int referenced = 0;
 
 	/*
@@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page,
 	 */
 	mapcount = page_mapcount(page);
 
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 {
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = 0;
 
 	BUG_ON(PageAnon(page));
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED) {
 			unsigned long address = vma_address(page, vma);
 			if (address == -EFAULT)
@@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = SWAP_AGAIN;
 	unsigned long cursor;
 	unsigned long max_nl_cursor = 0;
@@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	unsigned int mapcount;
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 		goto out;
 
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-						shared.vm_set.list) {
+							shared.nonlinear) {
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
 			max_nl_cursor = cursor;
@@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-						shared.vm_set.list) {
+							shared.nonlinear) {
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
@@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	 * in locked vmas).  Reset cursor on all unreserved nonlinear
 	 * vmas, now forgetting on which ones it had fallen behind.
 	 */
-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
 		vma->vm_private_data = NULL;
 out:
 	mutex_unlock(&mapping->i_mmap_mutex);
@@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = SWAP_AGAIN;
 
 	if (!mapping)
 		return ret;
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
-- 
cgit v1.2.3-70-g09d2


From 9c079add0d0f45220f4bb37febf0621137ec2d38 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:33 -0700
Subject: rbtree: move augmented rbtree functionality to rbtree_augmented.h

Provide rb_insert_augmented() and rb_erase_augmented() through a new
rbtree_augmented.h include file.  rb_erase_augmented() is defined there as
an __always_inline function, in order to allow inlining of augmented
rbtree callbacks into it.  Since this generates a relatively large
function, each augmented rbtree user should make sure to have a single
call site.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/rbtree.txt           |  13 +++
 arch/x86/mm/pat_rbtree.c           |   2 +-
 include/linux/interval_tree_tmpl.h |   8 +-
 include/linux/rbtree.h             |  48 --------
 include/linux/rbtree_augmented.h   | 223 +++++++++++++++++++++++++++++++++++++
 lib/rbtree.c                       | 162 ++-------------------------
 lib/rbtree_test.c                  |   2 +-
 7 files changed, 255 insertions(+), 203 deletions(-)
 create mode 100644 include/linux/rbtree_augmented.h

(limited to 'arch/x86')

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 0a0b6dce3e0..61b6c48871a 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -202,6 +202,14 @@ An rbtree user who wants this feature will have to call the augmentation
 functions with the user provided augmentation callback when inserting
 and erasing nodes.
 
+C files implementing augmented rbtree manipulation must include
+<linux/rbtree_augmented.h> instead of <linus/rbtree.h>. Note that
+linux/rbtree_augmented.h exposes some rbtree implementations details
+you are not expected to rely on; please stick to the documented APIs
+there and do not include <linux/rbtree_augmented.h> from header files
+either so as to minimize chances of your users accidentally relying on
+such implementation details.
+
 On insertion, the user must update the augmented information on the path
 leading to the inserted node, then call rb_link_node() as usual and
 rb_augment_inserted() instead of the usual rb_insert_color() call.
@@ -227,6 +235,11 @@ In both cases, the callbacks are provided through struct rb_augment_callbacks.
   subtree to a newly assigned subtree root AND recomputes the augmented
   information for the former subtree root.
 
+The compiled code for rb_erase_augmented() may inline the propagation and
+copy callbacks, which results in a large function, so each augmented rbtree
+user should have a single rb_erase_augmented() call site in order to limit
+compiled code size.
+
 
 Sample usage:
 
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 4d116959075..415f6c4ced3 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -12,7 +12,7 @@
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/sched.h>
 #include <linux/gfp.h>
 
diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h
index c65deda3141..c1aeb922d65 100644
--- a/include/linux/interval_tree_tmpl.h
+++ b/include/linux/interval_tree_tmpl.h
@@ -19,6 +19,8 @@
   include/linux/interval_tree_tmpl.h
 */
 
+#include <linux/rbtree_augmented.h>
+
 /*
  * Template for implementing interval trees
  *
@@ -57,7 +59,8 @@ static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
 	return max;
 }
 
-static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
+static inline void
+IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
 {
 	while (rb != stop) {
 		ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
@@ -69,7 +72,8 @@ static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
 	}
 }
 
-static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
+static inline void
+IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
 {
 	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
 	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d1e83b1c87..0022c1bb1e2 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -62,54 +62,6 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
 
-struct rb_augment_callbacks {
-	void (*propagate)(struct rb_node *node, struct rb_node *stop);
-	void (*copy)(struct rb_node *old, struct rb_node *new);
-	void (*rotate)(struct rb_node *old, struct rb_node *new);
-};
-
-extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
-extern void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-			       const struct rb_augment_callbacks *augment);
-static inline void
-rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-		    const struct rb_augment_callbacks *augment)
-{
-	__rb_insert_augmented(node, root, augment->rotate);
-}
-
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	      \
-			     rbtype, rbaugmented, rbcompute)		      \
-static void rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)    \
-{									      \
-	while (rb != stop) {						      \
-		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	      \
-		rbtype augmented = rbcompute(node);			      \
-		if (node->rbaugmented == augmented)			      \
-			break;						      \
-		node->rbaugmented = augmented;				      \
-		rb = rb_parent(&node->rbfield);				      \
-	}								      \
-}									      \
-static void rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)   \
-{									      \
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
-	new->rbaugmented = old->rbaugmented;				      \
-}									      \
-static void rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
-{									      \
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
-	new->rbaugmented = old->rbaugmented;				      \
-	old->rbaugmented = rbcompute(old);				      \
-}									      \
-rbstatic const struct rb_augment_callbacks rbname = {			      \
-	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	      \
-};
-
-
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
 extern struct rb_node *rb_prev(const struct rb_node *);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
new file mode 100644
index 00000000000..214caa33433
--- /dev/null
+++ b/include/linux/rbtree_augmented.h
@@ -0,0 +1,223 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _LINUX_RBTREE_AUGMENTED_H
+#define _LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+	void (*propagate)(struct rb_node *node, struct rb_node *stop);
+	void (*copy)(struct rb_node *old, struct rb_node *new);
+	void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+		    const struct rb_augment_callbacks *augment)
+{
+	__rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
+			     rbtype, rbaugmented, rbcompute)		\
+static inline void							\
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+{									\
+	while (rb != stop) {						\
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
+		rbtype augmented = rbcompute(node);			\
+		if (node->rbaugmented == augmented)			\
+			break;						\
+		node->rbaugmented = augmented;				\
+		rb = rb_parent(&node->rbfield);				\
+	}								\
+}									\
+static inline void							\
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+}									\
+static void								\
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+	old->rbaugmented = rbcompute(old);				\
+}									\
+rbstatic const struct rb_augment_callbacks rbname = {			\
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	\
+};
+
+
+#define	RB_RED		0
+#define	RB_BLACK	1
+
+#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc)     ((pc) & 1)
+#define __rb_is_black(pc)  __rb_color(pc)
+#define __rb_is_red(pc)    (!__rb_color(pc))
+#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+				       struct rb_node *p, int color)
+{
+	rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+		  struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else
+		root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+	struct rb_node *parent, *rebalance;
+	unsigned long pc;
+
+	if (!tmp) {
+		/*
+		 * Case 1: node to erase has no more than 1 child (easy!)
+		 *
+		 * Note that if there is one child it must be red due to 5)
+		 * and node must be black due to 4). We adjust colors locally
+		 * so as to bypass __rb_erase_color() later on.
+		 */
+		pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, child, parent, root);
+		if (child) {
+			child->__rb_parent_color = pc;
+			rebalance = NULL;
+		} else
+			rebalance = __rb_is_black(pc) ? parent : NULL;
+		tmp = parent;
+	} else if (!child) {
+		/* Still case 1, but this time the child is node->rb_left */
+		tmp->__rb_parent_color = pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, tmp, parent, root);
+		rebalance = NULL;
+		tmp = parent;
+	} else {
+		struct rb_node *successor = child, *child2;
+		tmp = child->rb_left;
+		if (!tmp) {
+			/*
+			 * Case 2: node's successor is its right child
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (s)  ->  (x) (c)
+			 *        \
+			 *        (c)
+			 */
+			parent = successor;
+			child2 = successor->rb_right;
+			augment->copy(node, successor);
+		} else {
+			/*
+			 * Case 3: node's successor is leftmost under
+			 * node's right child subtree
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (y)  ->  (x) (y)
+			 *      /            /
+			 *    (p)          (p)
+			 *    /            /
+			 *  (s)          (c)
+			 *    \
+			 *    (c)
+			 */
+			do {
+				parent = successor;
+				successor = tmp;
+				tmp = tmp->rb_left;
+			} while (tmp);
+			parent->rb_left = child2 = successor->rb_right;
+			successor->rb_right = child;
+			rb_set_parent(child, successor);
+			augment->copy(node, successor);
+			augment->propagate(parent, successor);
+		}
+
+		successor->rb_left = tmp = node->rb_left;
+		rb_set_parent(tmp, successor);
+
+		pc = node->__rb_parent_color;
+		tmp = __rb_parent(pc);
+		__rb_change_child(node, successor, tmp, root);
+		if (child2) {
+			successor->__rb_parent_color = pc;
+			rb_set_parent_color(child2, parent, RB_BLACK);
+			rebalance = NULL;
+		} else {
+			unsigned long pc2 = successor->__rb_parent_color;
+			successor->__rb_parent_color = pc;
+			rebalance = __rb_is_black(pc2) ? parent : NULL;
+		}
+		tmp = successor;
+	}
+
+	augment->propagate(tmp, NULL);
+	if (rebalance)
+		__rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif	/* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c0088ca345f..4f56a11d67f 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -21,7 +21,7 @@
   linux/lib/rbtree.c
 */
 
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/export.h>
 
 /*
@@ -44,52 +44,16 @@
  *  parentheses and have some accompanying text comment.
  */
 
-#define	RB_RED		0
-#define	RB_BLACK	1
-
-#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
-
-#define __rb_color(pc)     ((pc) & 1)
-#define __rb_is_black(pc)  __rb_color(pc)
-#define __rb_is_red(pc)    (!__rb_color(pc))
-#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
-#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
-#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
-
 static inline void rb_set_black(struct rb_node *rb)
 {
 	rb->__rb_parent_color |= RB_BLACK;
 }
 
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
-	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
-}
-
-static inline void rb_set_parent_color(struct rb_node *rb,
-				       struct rb_node *p, int color)
-{
-	rb->__rb_parent_color = (unsigned long)p | color;
-}
-
 static inline struct rb_node *rb_red_parent(struct rb_node *red)
 {
 	return (struct rb_node *)red->__rb_parent_color;
 }
 
-static inline void
-__rb_change_child(struct rb_node *old, struct rb_node *new,
-		  struct rb_node *parent, struct rb_root *root)
-{
-	if (parent) {
-		if (parent->rb_left == old)
-			parent->rb_left = new;
-		else
-			parent->rb_right = new;
-	} else
-		root->rb_node = new;
-}
-
 /*
  * Helper function for rotations:
  * - old's parent and color get assigned to new
@@ -230,9 +194,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
 	}
 }
 
-static __always_inline void
+__always_inline void
 __rb_erase_color(struct rb_node *parent, struct rb_root *root,
-		 const struct rb_augment_callbacks *augment)
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
 	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
 
@@ -261,7 +225,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent_color(tmp1, parent, RB_BLACK);
 				__rb_rotate_set_parents(parent, sibling, root,
 							RB_RED);
-				augment->rotate(parent, sibling);
+				augment_rotate(parent, sibling);
 				sibling = tmp1;
 			}
 			tmp1 = sibling->rb_right;
@@ -313,7 +277,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				if (tmp1)
 					rb_set_parent_color(tmp1, sibling,
 							    RB_BLACK);
-				augment->rotate(sibling, tmp2);
+				augment_rotate(sibling, tmp2);
 				tmp1 = sibling;
 				sibling = tmp2;
 			}
@@ -336,7 +300,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent(tmp2, parent);
 			__rb_rotate_set_parents(parent, sibling, root,
 						RB_BLACK);
-			augment->rotate(parent, sibling);
+			augment_rotate(parent, sibling);
 			break;
 		} else {
 			sibling = parent->rb_left;
@@ -347,7 +311,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent_color(tmp1, parent, RB_BLACK);
 				__rb_rotate_set_parents(parent, sibling, root,
 							RB_RED);
-				augment->rotate(parent, sibling);
+				augment_rotate(parent, sibling);
 				sibling = tmp1;
 			}
 			tmp1 = sibling->rb_left;
@@ -374,7 +338,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				if (tmp1)
 					rb_set_parent_color(tmp1, sibling,
 							    RB_BLACK);
-				augment->rotate(sibling, tmp2);
+				augment_rotate(sibling, tmp2);
 				tmp1 = sibling;
 				sibling = tmp2;
 			}
@@ -386,109 +350,12 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent(tmp2, parent);
 			__rb_rotate_set_parents(parent, sibling, root,
 						RB_BLACK);
-			augment->rotate(parent, sibling);
+			augment_rotate(parent, sibling);
 			break;
 		}
 	}
 }
-
-static __always_inline void
-__rb_erase(struct rb_node *node, struct rb_root *root,
-	   const struct rb_augment_callbacks *augment)
-{
-	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
-	struct rb_node *parent, *rebalance;
-	unsigned long pc;
-
-	if (!tmp) {
-		/*
-		 * Case 1: node to erase has no more than 1 child (easy!)
-		 *
-		 * Note that if there is one child it must be red due to 5)
-		 * and node must be black due to 4). We adjust colors locally
-		 * so as to bypass __rb_erase_color() later on.
-		 */
-		pc = node->__rb_parent_color;
-		parent = __rb_parent(pc);
-		__rb_change_child(node, child, parent, root);
-		if (child) {
-			child->__rb_parent_color = pc;
-			rebalance = NULL;
-		} else
-			rebalance = __rb_is_black(pc) ? parent : NULL;
-		tmp = parent;
-	} else if (!child) {
-		/* Still case 1, but this time the child is node->rb_left */
-		tmp->__rb_parent_color = pc = node->__rb_parent_color;
-		parent = __rb_parent(pc);
-		__rb_change_child(node, tmp, parent, root);
-		rebalance = NULL;
-		tmp = parent;
-	} else {
-		struct rb_node *successor = child, *child2;
-		tmp = child->rb_left;
-		if (!tmp) {
-			/*
-			 * Case 2: node's successor is its right child
-			 *
-			 *    (n)          (s)
-			 *    / \          / \
-			 *  (x) (s)  ->  (x) (c)
-			 *        \
-			 *        (c)
-			 */
-			parent = successor;
-			child2 = successor->rb_right;
-			augment->copy(node, successor);
-		} else {
-			/*
-			 * Case 3: node's successor is leftmost under
-			 * node's right child subtree
-			 *
-			 *    (n)          (s)
-			 *    / \          / \
-			 *  (x) (y)  ->  (x) (y)
-			 *      /            /
-			 *    (p)          (p)
-			 *    /            /
-			 *  (s)          (c)
-			 *    \
-			 *    (c)
-			 */
-			do {
-				parent = successor;
-				successor = tmp;
-				tmp = tmp->rb_left;
-			} while (tmp);
-			parent->rb_left = child2 = successor->rb_right;
-			successor->rb_right = child;
-			rb_set_parent(child, successor);
-			augment->copy(node, successor);
-			augment->propagate(parent, successor);
-		}
-
-		successor->rb_left = tmp = node->rb_left;
-		rb_set_parent(tmp, successor);
-
-		pc = node->__rb_parent_color;
-		tmp = __rb_parent(pc);
-		__rb_change_child(node, successor, tmp, root);
-		if (child2) {
-			successor->__rb_parent_color = pc;
-			rb_set_parent_color(child2, parent, RB_BLACK);
-			rebalance = NULL;
-		} else {
-			unsigned long pc2 = successor->__rb_parent_color;
-			successor->__rb_parent_color = pc;
-			rebalance = __rb_is_black(pc2) ? parent : NULL;
-		}
-		tmp = successor;
-	}
-
-	augment->propagate(tmp, NULL);
-	if (rebalance)
-		__rb_erase_color(rebalance, root, augment);
-}
+EXPORT_SYMBOL(__rb_erase_color);
 
 /*
  * Non-augmented rbtree manipulation functions.
@@ -513,7 +380,7 @@ EXPORT_SYMBOL(rb_insert_color);
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
-	__rb_erase(node, root, &dummy_callbacks);
+	rb_erase_augmented(node, root, &dummy_callbacks);
 }
 EXPORT_SYMBOL(rb_erase);
 
@@ -531,13 +398,6 @@ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 }
 EXPORT_SYMBOL(__rb_insert_augmented);
 
-void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-			const struct rb_augment_callbacks *augment)
-{
-	__rb_erase(node, root, augment);
-}
-EXPORT_SYMBOL(rb_erase_augmented);
-
 /*
  * This function returns the first node (in sort order) of the tree.
  */
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index b20e99969b0..268b23951fe 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -1,5 +1,5 @@
 #include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/random.h>
 #include <asm/timex.h>
 
-- 
cgit v1.2.3-70-g09d2


From e79bee24fd6134f90af4228cfebd010136d67631 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Mon, 8 Oct 2012 16:32:18 -0700
Subject: atomic: implement generic atomic_dec_if_positive()

The x86 implementation of atomic_dec_if_positive is quite generic, so make
it available to all architectures.

This is needed for "swap: add a simple detector for inappropriate swapin
readahead".

[akpm@linux-foundation.org: do the "#define foo foo" trick in the conventional manner]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/microblaze/include/asm/atomic.h |  1 +
 arch/powerpc/include/asm/atomic.h    |  1 +
 arch/x86/include/asm/atomic.h        | 24 ------------------------
 include/linux/atomic.h               | 25 +++++++++++++++++++++++++
 4 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index 472d8bf726d..42ac382a09d 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -22,5 +22,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
 	return res;
 }
+#define atomic_dec_if_positive atomic_dec_if_positive
 
 #endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index da29032ae38..e3b1d41c89b 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -268,6 +268,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 	return t;
 }
+#define atomic_dec_if_positive atomic_dec_if_positive
 
 #define smp_mb__before_atomic_dec()     smp_mb()
 #define smp_mb__after_atomic_dec()      smp_mb()
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 250b8774c15..b6c3b821acf 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline int atomic_dec_if_positive(atomic_t *v)
-{
-	int c, old, dec;
-	c = atomic_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 /**
  * atomic_inc_short - increment of a short integer
  * @v: pointer to type int
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 70cfcb2d63c..5b08a8540ec 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -86,6 +86,31 @@ static inline int atomic_dec_unless_positive(atomic_t *p)
 }
 #endif
 
+/*
+ * atomic_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+#ifndef atomic_dec_if_positive
+static inline int atomic_dec_if_positive(atomic_t *v)
+{
+	int c, old, dec;
+	c = atomic_read(v);
+	for (;;) {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+		old = atomic_cmpxchg((v), c, dec);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return dec;
+}
+#endif
+
 #ifndef CONFIG_ARCH_HAS_ATOMIC_OR
 static inline void atomic_or(int i, atomic_t *v)
 {
-- 
cgit v1.2.3-70-g09d2


From 45cac65b0fcd287ebb877b141d40ba9bbe8e5da7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Mon, 8 Oct 2012 16:32:19 -0700
Subject: readahead: fault retry breaks mmap file read random detection

.fault now can retry.  The retry can break state machine of .fault.  In
filemap_fault, if page is miss, ra->mmap_miss is increased.  In the second
try, since the page is in page cache now, ra->mmap_miss is decreased.  And
these are done in one fault, so we can't detect random mmap file access.

Add a new flag to indicate .fault is tried once.  In the second try, skip
ra->mmap_miss decreasing.  The filemap_fault state machine is ok with it.

I only tested x86, didn't test other archs, but looks the change for other
archs is obvious, but who knows :)

Signed-off-by: Shaohua Li <shaohua.li@fusionio.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault.c        | 1 +
 arch/avr32/mm/fault.c      | 1 +
 arch/cris/mm/fault.c       | 1 +
 arch/hexagon/mm/vm_fault.c | 1 +
 arch/ia64/mm/fault.c       | 1 +
 arch/m68k/mm/fault.c       | 1 +
 arch/microblaze/mm/fault.c | 1 +
 arch/mips/mm/fault.c       | 1 +
 arch/openrisc/mm/fault.c   | 1 +
 arch/powerpc/mm/fault.c    | 1 +
 arch/s390/mm/fault.c       | 1 +
 arch/sh/mm/fault.c         | 1 +
 arch/sparc/mm/fault_32.c   | 1 +
 arch/sparc/mm/fault_64.c   | 1 +
 arch/tile/mm/fault.c       | 1 +
 arch/um/kernel/trap.c      | 1 +
 arch/x86/mm/fault.c        | 1 +
 arch/xtensa/mm/fault.c     | 1 +
 include/linux/mm.h         | 1 +
 mm/filemap.c               | 4 ++--
 20 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd8345022..5dbf13f954f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@ retry:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			* of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b92e6095861..b2f2d2d6684 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -152,6 +152,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would have
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 45fd542cf17..73312ab6c69 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -186,6 +186,7 @@ retry:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 06695cc4fe5..513b74cb397 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -113,6 +113,7 @@ good_area:
 				current->min_flt++;
 			if (fault & VM_FAULT_RETRY) {
 				flags &= ~FAULT_FLAG_ALLOW_RETRY;
+				flags |= FAULT_FLAG_TRIED;
 				goto retry;
 			}
 		}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 8443daf4f51..6cf0341f978 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -184,6 +184,7 @@ retry:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index aeebbb7b30f..a563727806b 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -170,6 +170,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index eb365d6795f..714b35a9c4f 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -233,6 +233,7 @@ good_area:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index c14f6dfed99..9f513486af1 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,6 +171,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 40f850e9766..e2bfafce66c 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -183,6 +183,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5495ebe983a..0a6b28336eb 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -451,6 +451,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122ca115..04ad4001a28 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@ retry:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			down_read(&mm->mmap_sem);
 			goto retry;
 		}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 3bdc1ad9a34..cbbdcad8fcb 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -504,6 +504,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917be15..e98bfda205a 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429b631..413d2926330 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 84ce7abbf5a..fe811fa5f1b 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -454,6 +454,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /*
 			  * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35..0f00e9c8208 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -89,6 +89,7 @@ good_area:
 				current->min_flt++;
 			if (fault & VM_FAULT_RETRY) {
 				flags &= ~FAULT_FLAG_ALLOW_RETRY;
+				flags |= FAULT_FLAG_TRIED;
 
 				goto retry;
 			}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b230e7d..8e13ecb41be 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1220,6 +1220,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 5a74c53bc69..2c2f710ed1d 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -126,6 +126,7 @@ good_area:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b01e585ab4b..bcaab4e6fe9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -161,6 +161,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED	0x40	/* second try */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/filemap.c b/mm/filemap.c
index a9827b42556..83efee76a5c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1607,13 +1607,13 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * Do we have something in the page cache already?
 	 */
 	page = find_get_page(mapping, offset);
-	if (likely(page)) {
+	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
 		/*
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
 		do_async_mmap_readahead(vma, ra, file, page, offset);
-	} else {
+	} else if (!page) {
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
-- 
cgit v1.2.3-70-g09d2


From 027ef6c87853b0a9df53175063028edb4950d476 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 8 Oct 2012 16:33:27 -0700
Subject: mm: thp: fix pmd_present for split_huge_page and PROT_NONE with THP

In many places !pmd_present has been converted to pmd_none.  For pmds
that's equivalent and pmd_none is quicker so using pmd_none is better.

However (unless we delete pmd_present) we should provide an accurate
pmd_present too.  This will avoid the risk of code thinking the pmd is non
present because it's under __split_huge_page_map, see the pmd_mknotpresent
there and the comment above it.

If the page has been mprotected as PROT_NONE, it would also lead to a
pmd_present false negative in the same way as the race with
split_huge_page.

Because the PSE bit stays on at all times (both during split_huge_page and
when the _PAGE_PROTNONE bit get set), we could only check for the PSE bit,
but checking the PROTNONE bit too is still good to remember pmd_present
must always keep PROT_NONE into account.

This explains a not reproducible BUG_ON that was seldom reported on the
lists.

The same issue is in pmd_large, it would go wrong with both PROT_NONE and
if it races with split_huge_page.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fc994846529..a1f780d45f7 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 
 static inline int pmd_large(pmd_t pte)
 {
-	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-		(_PAGE_PSE | _PAGE_PRESENT);
+	return pmd_flags(pte) & _PAGE_PSE;
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
 
 static inline int pmd_present(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_PRESENT;
+	/*
+	 * Checking for _PAGE_PSE is needed too because
+	 * split_huge_page will temporarily clear the present bit (but
+	 * the _PAGE_PSE flag will remain set at all times while the
+	 * _PAGE_PRESENT bit is clear).
+	 */
+	return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 }
 
 static inline int pmd_none(pmd_t pmd)
-- 
cgit v1.2.3-70-g09d2


From b113da65785d5f3f9ff1451ec0fe43d6d76da25b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 8 Oct 2012 16:34:25 -0700
Subject: mm: Add and use update_mmu_cache_pmd() in transparent huge page code.

The transparent huge page code passes a PMD pointer in as the third
argument of update_mmu_cache(), which expects a PTE pointer.

This never got noticed because X86 implements update_mmu_cache() as a
macro and thus we don't get any type checking, and X86 is the only
architecture which supports transparent huge pages currently.

Before other architectures can support transparent huge pages properly we
need to add a new interface which will take a PMD pointer as the third
argument rather than a PTE pointer.

[akpm@linux-foundation.org: implement update_mm_cache_pmd() for s390]
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h   | 1 +
 arch/x86/include/asm/pgtable_32.h | 1 +
 arch/x86/include/asm/pgtable_64.h | 1 +
 mm/huge_memory.c                  | 6 +++---
 4 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ed14fc2db6e..979fe3dc078 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,6 +42,7 @@ extern void fault_init(void);
  * tables contain all the necessary information.
  */
 #define update_mmu_cache(vma, address, ptep)     do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
 
 /*
  * ZERO_PAGE is a global shared page that is always zero; used
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0c92113c4cb..8faa215a503 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -71,6 +71,7 @@ do {						\
  * tables contain all the necessary information.
  */
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 8251be02301..47356f9df82 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3a8d6b7d95d..68a3c93036f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -900,7 +900,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
-			update_mmu_cache(vma, address, pmd);
+			update_mmu_cache_pmd(vma, address, pmd);
 		ret |= VM_FAULT_WRITE;
 		goto out_unlock;
 	}
@@ -956,7 +956,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
 		set_pmd_at(mm, haddr, pmd, entry);
-		update_mmu_cache(vma, address, pmd);
+		update_mmu_cache_pmd(vma, address, pmd);
 		page_remove_rmap(page);
 		put_page(page);
 		ret |= VM_FAULT_WRITE;
@@ -2041,7 +2041,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	BUG_ON(!pmd_none(*pmd));
 	page_add_new_anon_rmap(new_page, vma, address);
 	set_pmd_at(mm, address, pmd, _pmd);
-	update_mmu_cache(vma, address, pmd);
+	update_mmu_cache_pmd(vma, address, pmd);
 	pgtable_trans_huge_deposit(mm, pgtable);
 	spin_unlock(&mm->page_table_lock);
 
-- 
cgit v1.2.3-70-g09d2


From 4301785c7c7ca712797f2f25bcd531b1d226ccb3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:25:00 +0100
Subject: um/x86: merge 32 and 64bit variants of checksum.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/asm/checksum.h    | 144 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/um/asm/checksum_32.h | 140 ----------------------------------------
 arch/x86/um/asm/checksum_64.h | 125 ------------------------------------
 3 files changed, 144 insertions(+), 265 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b6efe2381b5..4b181b74454 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -1,6 +1,150 @@
 #ifndef __UM_CHECKSUM_H
 #define __UM_CHECKSUM_H
 
+#include <linux/string.h>
+#include <linux/in6.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				       int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					 int len, __wsum sum, int *err_ptr)
+{
+	if (copy_from_user(dst, src, len)) {
+		*err_ptr = -EFAULT;
+		return (__force __wsum)-1;
+	}
+
+	return csum_partial(dst, len, sum);
+}
+
+/**
+ * csum_fold - Fold and invert a 32bit checksum.
+ * sum: 32bit unfolded sum
+ *
+ * Fold a 32bit running checksum to 16bit and invert it. This is usually
+ * the last step before putting a checksum into a packet.
+ * Make sure not to mix with 64bit checksums.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	__asm__(
+		"  addl %1,%0\n"
+		"  adcl $0xffff,%0"
+		: "=r" (sum)
+		: "r" ((__force u32)sum << 16),
+		  "0" ((__force u32)sum & 0xffff0000)
+	);
+	return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+/**
+ * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
+ * @proto: ip protocol of packet
+ * @sum: initial sum to be added in (32bit unfolded)
+ *
+ * Returns the pseudo header checksum the input data. Result is
+ * 32bit unfolded.
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
+{
+	asm("  addl %1, %0\n"
+	    "  adcl %2, %0\n"
+	    "  adcl %3, %0\n"
+	    "  adcl $0, %0\n"
+		: "=r" (sum)
+	    : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					   unsigned short len,
+					   unsigned short proto,
+					   __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/**
+ * ip_fast_csum - Compute the IPv4 header checksum efficiently.
+ * iph: ipv4 header
+ * ihl: length of header / 4
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int sum;
+
+	asm(	"  movl (%1), %0\n"
+		"  subl $4, %2\n"
+		"  jbe 2f\n"
+		"  addl 4(%1), %0\n"
+		"  adcl 8(%1), %0\n"
+		"  adcl 12(%1), %0\n"
+		"1: adcl 16(%1), %0\n"
+		"  lea 4(%1), %1\n"
+		"  decl %2\n"
+		"  jne	1b\n"
+		"  adcl $0, %0\n"
+		"  movl %0, %2\n"
+		"  shrl $16, %0\n"
+		"  addw %w2, %w0\n"
+		"  adcl $0, %0\n"
+		"  notl %0\n"
+		"2:"
+	/* Since the input registers which are loaded with iph and ipl
+	   are modified, we must also specify them as outputs, or gcc
+	   will assume they contain their original values. */
+	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "1" (iph), "2" (ihl)
+	: "memory");
+	return (__force __sum16)sum;
+}
+
 #ifdef CONFIG_X86_32
 # include "checksum_32.h"
 #else
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
index caab74252e2..ab77b6f9a4b 100644
--- a/arch/x86/um/asm/checksum_32.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -5,145 +5,6 @@
 #ifndef __UM_SYSDEP_CHECKSUM_H
 #define __UM_SYSDEP_CHECKSUM_H
 
-#include "linux/in6.h"
-#include "linux/string.h"
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *	Note: when you get a NULL pointer exception here this means someone
- *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the
- *	access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					 int len, __wsum sum, int *err_ptr)
-{
-	if (copy_from_user(dst, src, len)) {
-		*err_ptr = -EFAULT;
-		return (__force __wsum)-1;
-	}
-
-	return csum_partial(dst, len, sum);
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- *
- *	By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
- *	Arnt Gulbrandsen.
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	unsigned int sum;
-
-	__asm__ __volatile__(
-	    "movl (%1), %0	;\n"
-	    "subl $4, %2	;\n"
-	    "jbe 2f		;\n"
-	    "addl 4(%1), %0	;\n"
-	    "adcl 8(%1), %0	;\n"
-	    "adcl 12(%1), %0	;\n"
-"1:	    adcl 16(%1), %0	;\n"
-	    "lea 4(%1), %1	;\n"
-	    "decl %2		;\n"
-	    "jne 1b		;\n"
-	    "adcl $0, %0	;\n"
-	    "movl %0, %2	;\n"
-	    "shrl $16, %0	;\n"
-	    "addw %w2, %w0	;\n"
-	    "adcl $0, %0	;\n"
-	    "notl %0		;\n"
-"2:				;\n"
-	/* Since the input registers which are loaded with iph and ipl
-	   are modified, we must also specify them as outputs, or gcc
-	   will assume they contain their original values. */
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
-	: "1" (iph), "2" (ihl)
-	: "memory");
-	return (__force __sum16)sum;
-}
-
-/*
- *	Fold a partial checksum
- */
-
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__(
-		"addl %1, %0		;\n"
-		"adcl $0xffff, %0	;\n"
-		: "=r" (sum)
-		: "r" ((__force u32)sum << 16),
-		  "0" ((__force u32)sum & 0xffff0000)
-	);
-	return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-    __asm__(
-	"addl %1, %0	;\n"
-	"adcl %2, %0	;\n"
-	"adcl %3, %0	;\n"
-	"adcl $0, %0	;\n"
-	: "=r" (sum)
-	: "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
-    return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-
 static inline __sum16 ip_compute_csum(const void *buff, int len)
 {
     return csum_fold (csum_partial(buff, len, 0));
@@ -198,4 +59,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src,
 }
 
 #endif
-
diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h
index a5be9031ea8..7b6cd192157 100644
--- a/arch/x86/um/asm/checksum_64.h
+++ b/arch/x86/um/asm/checksum_64.h
@@ -5,131 +5,6 @@
 #ifndef __UM_SYSDEP_CHECKSUM_H
 #define __UM_SYSDEP_CHECKSUM_H
 
-#include "linux/string.h"
-#include "linux/in6.h"
-#include "asm/uaccess.h"
-
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *	Note: when you get a NULL pointer exception here this means someone
- *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the
- *	access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return(csum_partial(dst, len, sum));
-}
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src,
-                                         void *dst, int len, __wsum sum,
-                                         int *err_ptr)
-{
-        if (copy_from_user(dst, src, len)) {
-                *err_ptr = -EFAULT;
-                return (__force __wsum)-1;
-        }
-        return csum_partial(dst, len, sum);
-}
-
-/**
- * csum_fold - Fold and invert a 32bit checksum.
- * sum: 32bit unfolded sum
- *
- * Fold a 32bit running checksum to 16bit and invert it. This is usually
- * the last step before putting a checksum into a packet.
- * Make sure not to mix with 64bit checksums.
- */
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__(
-		"  addl %1,%0\n"
-		"  adcl $0xffff,%0"
-		: "=r" (sum)
-		: "r" ((__force u32)sum << 16),
-		  "0" ((__force u32)sum & 0xffff0000)
-	);
-	return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-/**
- * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
- * @saddr: source address
- * @daddr: destination address
- * @len: length of packet
- * @proto: ip protocol of packet
- * @sum: initial sum to be added in (32bit unfolded)
- *
- * Returns the pseudo header checksum the input data. Result is
- * 32bit unfolded.
- */
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
-		   unsigned short proto, __wsum sum)
-{
-	asm("  addl %1, %0\n"
-	    "  adcl %2, %0\n"
-	    "  adcl %3, %0\n"
-	    "  adcl $0, %0\n"
-		: "=r" (sum)
-	    : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
-	return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-					   unsigned short len,
-					   unsigned short proto,
-					   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/**
- * ip_fast_csum - Compute the IPv4 header checksum efficiently.
- * iph: ipv4 header
- * ihl: length of header / 4
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	unsigned int sum;
-
-	asm(	"  movl (%1), %0\n"
-		"  subl $4, %2\n"
-		"  jbe 2f\n"
-		"  addl 4(%1), %0\n"
-		"  adcl 8(%1), %0\n"
-		"  adcl 12(%1), %0\n"
-		"1: adcl 16(%1), %0\n"
-		"  lea 4(%1), %1\n"
-		"  decl %2\n"
-		"  jne	1b\n"
-		"  adcl $0, %0\n"
-		"  movl %0, %2\n"
-		"  shrl $16, %0\n"
-		"  addw %w2, %w0\n"
-		"  adcl $0, %0\n"
-		"  notl %0\n"
-		"2:"
-	/* Since the input registers which are loaded with iph and ipl
-	   are modified, we must also specify them as outputs, or gcc
-	   will assume they contain their original values. */
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
-	: "1" (iph), "2" (ihl)
-	: "memory");
-	return (__force __sum16)sum;
-}
-
 static inline unsigned add32_with_carry(unsigned a, unsigned b)
 {
         asm("addl %2,%0\n\t"
-- 
cgit v1.2.3-70-g09d2


From 8813f67439524ab54978b011c51665e1854a64b5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:26:17 +0100
Subject: um/x86: merge 32 and 64 bit variants of ptrace.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/asm/ptrace.h    | 58 +++++++++++++++++++++++++++++++++++++++++----
 arch/x86/um/asm/ptrace_32.h | 28 ----------------------
 arch/x86/um/asm/ptrace_64.h | 46 -----------------------------------
 3 files changed, 54 insertions(+), 78 deletions(-)
 delete mode 100644 arch/x86/um/asm/ptrace_32.h
 delete mode 100644 arch/x86/um/asm/ptrace_64.h

(limited to 'arch/x86')

diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e72cd0df5ba..755133258c4 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,11 +1,13 @@
 #ifndef __UM_X86_PTRACE_H
 #define __UM_X86_PTRACE_H
 
-#ifdef CONFIG_X86_32
-# include "ptrace_32.h"
-#else
-# include "ptrace_64.h"
+#include <linux/compiler.h>
+#ifndef CONFIG_X86_32
+#define __FRAME_OFFSETS /* Needed to get the R* macros */
 #endif
+#include <asm/ptrace-generic.h>
+
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
 
 #define PT_REGS_AX(r) UPT_AX(&(r)->regs)
 #define PT_REGS_BX(r) UPT_BX(&(r)->regs)
@@ -36,4 +38,52 @@ static inline long regs_return_value(struct pt_regs *regs)
 {
 	return PT_REGS_AX(regs);
 }
+
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+#ifdef CONFIG_X86_32
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
+
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+#else
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
+
+#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
+#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
+#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
+#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
+#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
+#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
+#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
+#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
+
+#include <asm/errno.h>
+
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+extern long arch_prctl(struct task_struct *task, int code,
+		       unsigned long __user *addr);
+
+#endif
 #endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
deleted file mode 100644
index 2cf225351b6..00000000000
--- a/arch/x86/um/asm/ptrace_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_I386_H
-#define __UM_PTRACE_I386_H
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
-#include "linux/compiler.h"
-#include "asm/ptrace-generic.h"
-
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-/*
- * Forward declaration to avoid including sysdep/tls.h, which causes a
- * circular include, and compilation failures.
- */
-struct user_desc;
-
-extern int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-extern int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-#endif
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
deleted file mode 100644
index ea7bff39432..00000000000
--- a/arch/x86/um/asm/ptrace_64.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_X86_64_H
-#define __UM_PTRACE_X86_64_H
-
-#include "linux/compiler.h"
-#include "asm/errno.h"
-
-#define __FRAME_OFFSETS /* Needed to get the R* macros */
-#include "asm/ptrace-generic.h"
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
-#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
-#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
-#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
-#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
-#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
-#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
-#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
-#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
-
-/* XXX */
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-struct user_desc;
-
-static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-extern long arch_prctl(struct task_struct *task, int code,
-		       unsigned long __user *addr);
-#endif
-- 
cgit v1.2.3-70-g09d2


From 382d95fdfa7ff5c54f6495c597c7cf6d124e404b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:26:54 +0100
Subject: um: move sysrq.h out of include/shared

never used by userland-side objects

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/asm/sysrq.h    |  7 +++++++
 arch/um/include/shared/sysrq.h |  7 -------
 arch/um/kernel/sysrq.c         |  2 +-
 arch/x86/um/sysrq_32.c         | 12 ++++++------
 arch/x86/um/sysrq_64.c         |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)
 create mode 100644 arch/um/include/asm/sysrq.h
 delete mode 100644 arch/um/include/shared/sysrq.h

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h
new file mode 100644
index 00000000000..c8d332b56b9
--- /dev/null
+++ b/arch/um/include/asm/sysrq.h
@@ -0,0 +1,7 @@
+#ifndef __UM_SYSRQ_H
+#define __UM_SYSRQ_H
+
+struct task_struct;
+extern void show_trace(struct task_struct* task, unsigned long *stack);
+
+#endif
diff --git a/arch/um/include/shared/sysrq.h b/arch/um/include/shared/sysrq.h
deleted file mode 100644
index c8d332b56b9..00000000000
--- a/arch/um/include/shared/sysrq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __UM_SYSRQ_H
-#define __UM_SYSRQ_H
-
-struct task_struct;
-extern void show_trace(struct task_struct* task, unsigned long *stack);
-
-#endif
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 0960de54495..e562ff80409 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -7,7 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include "sysrq.h"
+#include <asm/sysrq.h>
 
 /* Catch non-i386 SUBARCH's. */
 #if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT)
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 2d5cc51e9be..c9bee5b8c0d 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/smp.h"
-#include "linux/sched.h"
-#include "linux/kallsyms.h"
-#include "asm/ptrace.h"
-#include "sysrq.h"
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <asm/ptrace.h>
+#include <asm/sysrq.h>
 
 /* This is declared by <linux/sched.h> */
 void show_regs(struct pt_regs *regs)
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 08258f17996..a0e7fb1134a 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -10,7 +10,7 @@
 #include <linux/utsname.h>
 #include <asm/current.h>
 #include <asm/ptrace.h>
-#include "sysrq.h"
+#include <asm/sysrq.h>
 
 void __show_regs(struct pt_regs *regs)
 {
-- 
cgit v1.2.3-70-g09d2


From 37185b33240870719b6b5913a46e6a441f1ae96f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:27:32 +0100
Subject: um: get rid of pointless include "..." where include <...> will do

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/chan_kern.c              |  4 ++--
 arch/um/drivers/chan_user.c              |  4 ++--
 arch/um/drivers/chan_user.h              |  2 +-
 arch/um/drivers/cow_sys.h                |  6 +++---
 arch/um/drivers/daemon.h                 |  2 +-
 arch/um/drivers/daemon_kern.c            |  4 ++--
 arch/um/drivers/daemon_user.c            |  6 +++---
 arch/um/drivers/fd.c                     |  4 ++--
 arch/um/drivers/harddog_user.c           |  2 +-
 arch/um/drivers/hostaudio_kern.c         | 18 ++++++++--------
 arch/um/drivers/line.c                   | 16 +++++++-------
 arch/um/drivers/line.h                   | 12 +++++------
 arch/um/drivers/mconsole.h               |  2 +-
 arch/um/drivers/mconsole_kern.c          | 10 ++++-----
 arch/um/drivers/mconsole_kern.h          |  2 +-
 arch/um/drivers/mmapper_kern.c           |  2 +-
 arch/um/drivers/net_kern.c               | 10 ++++-----
 arch/um/drivers/net_user.c               |  6 +++---
 arch/um/drivers/null.c                   |  2 +-
 arch/um/drivers/pcap_kern.c              |  4 ++--
 arch/um/drivers/pcap_user.c              |  4 ++--
 arch/um/drivers/pcap_user.h              |  2 +-
 arch/um/drivers/port_kern.c              | 20 +++++++++---------
 arch/um/drivers/port_user.c              |  4 ++--
 arch/um/drivers/pty.c                    |  4 ++--
 arch/um/drivers/random.c                 |  4 ++--
 arch/um/drivers/slip_common.c            |  2 +-
 arch/um/drivers/slip_kern.c              |  2 +-
 arch/um/drivers/slip_user.c              |  6 +++---
 arch/um/drivers/slirp_kern.c             |  6 +++---
 arch/um/drivers/slirp_user.c             |  4 ++--
 arch/um/drivers/ssl.c                    | 22 +++++++++----------
 arch/um/drivers/stdio_console.c          | 36 ++++++++++++++++----------------
 arch/um/drivers/tty.c                    |  4 ++--
 arch/um/drivers/ubd_kern.c               |  8 +++----
 arch/um/drivers/ubd_user.c               |  2 +-
 arch/um/drivers/umcast.h                 |  2 +-
 arch/um/drivers/umcast_kern.c            |  4 ++--
 arch/um/drivers/umcast_user.c            |  4 ++--
 arch/um/drivers/vde_kern.c               |  6 +++---
 arch/um/drivers/vde_user.c               |  4 ++--
 arch/um/drivers/xterm.c                  |  4 ++--
 arch/um/drivers/xterm_kern.c             |  4 ++--
 arch/um/include/asm/dma.h                |  2 +-
 arch/um/include/asm/mmu.h                |  2 +-
 arch/um/include/asm/page.h               |  2 +-
 arch/um/include/asm/pgtable.h            |  4 ++--
 arch/um/include/asm/processor-generic.h  |  6 +++---
 arch/um/include/asm/ptrace-generic.h     |  2 +-
 arch/um/include/asm/smp.h                |  6 +++---
 arch/um/include/shared/arch.h            |  2 +-
 arch/um/include/shared/as-layout.h       |  2 +-
 arch/um/include/shared/irq_kern.h        |  4 ++--
 arch/um/include/shared/irq_user.h        |  2 +-
 arch/um/include/shared/kern_util.h       |  4 ++--
 arch/um/include/shared/longjmp.h         |  4 ++--
 arch/um/include/shared/os.h              |  6 +++---
 arch/um/include/shared/registers.h       |  4 ++--
 arch/um/include/shared/skas/skas.h       |  2 +-
 arch/um/include/shared/skas_ptrace.h     |  2 +-
 arch/um/kernel/asm-offsets.c             |  2 +-
 arch/um/kernel/config.c.in               |  2 +-
 arch/um/kernel/dyn.lds.S                 |  2 +-
 arch/um/kernel/early_printk.c            |  2 +-
 arch/um/kernel/exec.c                    |  8 +++----
 arch/um/kernel/gmon_syms.c               |  2 +-
 arch/um/kernel/gprof_syms.c              |  2 +-
 arch/um/kernel/initrd.c                  | 12 +++++------
 arch/um/kernel/irq.c                     | 22 +++++++++----------
 arch/um/kernel/ksyms.c                   |  2 +-
 arch/um/kernel/mem.c                     | 12 +++++------
 arch/um/kernel/process.c                 |  8 +++----
 arch/um/kernel/reboot.c                  | 14 ++++++-------
 arch/um/kernel/sigio.c                   |  6 +++---
 arch/um/kernel/signal.c                  |  4 ++--
 arch/um/kernel/skas/clone.c              |  8 +++----
 arch/um/kernel/skas/mmu.c                | 16 +++++++-------
 arch/um/kernel/skas/process.c            | 12 +++++------
 arch/um/kernel/skas/syscall.c            | 10 ++++-----
 arch/um/kernel/skas/uaccess.c            |  4 ++--
 arch/um/kernel/smp.c                     | 30 +++++++++++++-------------
 arch/um/kernel/syscall.c                 | 20 +++++++++---------
 arch/um/kernel/time.c                    |  4 ++--
 arch/um/kernel/tlb.c                     |  8 +++----
 arch/um/kernel/trap.c                    | 10 ++++-----
 arch/um/kernel/um_arch.c                 | 14 ++++++-------
 arch/um/kernel/umid.c                    |  6 +++---
 arch/um/kernel/uml.lds.S                 |  2 +-
 arch/um/os-Linux/aio.c                   |  8 +++----
 arch/um/os-Linux/drivers/etap.h          |  2 +-
 arch/um/os-Linux/drivers/ethertap_kern.c |  2 +-
 arch/um/os-Linux/drivers/ethertap_user.c |  6 +++---
 arch/um/os-Linux/drivers/tuntap.h        |  2 +-
 arch/um/os-Linux/drivers/tuntap_kern.c   |  2 +-
 arch/um/os-Linux/drivers/tuntap_user.c   |  4 ++--
 arch/um/os-Linux/elf_aux.c               |  6 +++---
 arch/um/os-Linux/execvp.c                |  4 ++--
 arch/um/os-Linux/file.c                  |  2 +-
 arch/um/os-Linux/helper.c                |  6 +++---
 arch/um/os-Linux/irq.c                   |  6 +++---
 arch/um/os-Linux/main.c                  | 10 ++++-----
 arch/um/os-Linux/mem.c                   |  4 ++--
 arch/um/os-Linux/process.c               |  8 +++----
 arch/um/os-Linux/registers.c             |  6 +++---
 arch/um/os-Linux/sigio.c                 | 10 ++++-----
 arch/um/os-Linux/signal.c                |  8 +++----
 arch/um/os-Linux/skas/mem.c              | 20 +++++++++---------
 arch/um/os-Linux/skas/process.c          | 22 +++++++++----------
 arch/um/os-Linux/start_up.c              | 14 ++++++-------
 arch/um/os-Linux/time.c                  |  4 ++--
 arch/um/os-Linux/tty.c                   |  4 ++--
 arch/um/os-Linux/umid.c                  |  4 ++--
 arch/um/os-Linux/user_syms.c             |  4 ++--
 arch/um/os-Linux/util.c                  |  2 +-
 arch/um/sys-ppc/miscthings.c             |  6 +++---
 arch/um/sys-ppc/ptrace.c                 |  2 +-
 arch/um/sys-ppc/ptrace_user.c            |  2 +-
 arch/um/sys-ppc/shared/sysdep/ptrace.h   |  2 +-
 arch/um/sys-ppc/sigcontext.c             |  2 +-
 arch/um/sys-ppc/sysrq.c                  |  4 ++--
 arch/x86/um/asm/elf.h                    |  2 +-
 arch/x86/um/bugs_32.c                    |  6 +++---
 arch/x86/um/bugs_64.c                    |  2 +-
 arch/x86/um/fault.c                      |  2 +-
 arch/x86/um/ldt.c                        | 10 ++++-----
 arch/x86/um/mem_64.c                     |  6 +++---
 arch/x86/um/os-Linux/registers.c         |  4 ++--
 arch/x86/um/os-Linux/task_size.c         |  2 +-
 arch/x86/um/os-Linux/tls.c               |  2 +-
 arch/x86/um/ptrace_32.c                  |  8 +++----
 arch/x86/um/ptrace_user.c                |  2 +-
 arch/x86/um/shared/sysdep/ptrace.h       |  2 +-
 arch/x86/um/shared/sysdep/stub.h         |  4 ++--
 arch/x86/um/shared/sysdep/syscalls_32.h  |  4 ++--
 arch/x86/um/signal.c                     |  4 ++--
 arch/x86/um/stub_32.S                    |  2 +-
 arch/x86/um/stub_64.S                    |  2 +-
 arch/x86/um/stub_segv.c                  |  6 +++---
 arch/x86/um/tls_32.c                     | 12 +++++------
 arch/x86/um/tls_64.c                     |  2 +-
 fs/hostfs/hostfs.h                       |  2 +-
 fs/hostfs/hostfs_kern.c                  |  4 ++--
 fs/hostfs/hostfs_user.c                  |  1 -
 fs/hppfs/hppfs.c                         |  2 +-
 144 files changed, 428 insertions(+), 429 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 87eebfe03c6..c3bba73e4be 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -7,8 +7,8 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include "chan.h"
-#include "os.h"
-#include "irq_kern.h"
+#include <os.h>
+#include <irq_kern.h>
 
 #ifdef CONFIG_NOCONFIG_CHAN
 static void *not_configged_init(char *str, int device,
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index f180813ce2c..9be670ad23b 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -11,8 +11,8 @@
 #include <termios.h>
 #include <sys/ioctl.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 void generic_close(int fd, void *unused)
 {
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 6257b7a6e1a..dc693298eb8 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -6,7 +6,7 @@
 #ifndef __CHAN_USER_H__
 #define __CHAN_USER_H__
 
-#include "init.h"
+#include <init.h>
 
 struct chan_opts {
 	void (*const announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
index 7f2ed0b8824..67cbee63e70 100644
--- a/arch/um/drivers/cow_sys.h
+++ b/arch/um/drivers/cow_sys.h
@@ -1,9 +1,9 @@
 #ifndef __COW_SYS_H__
 #define __COW_SYS_H__
 
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 static inline void *cow_malloc(int size)
 {
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
index 6e0e891f8a0..c2dd1951559 100644
--- a/arch/um/drivers/daemon.h
+++ b/arch/um/drivers/daemon.h
@@ -6,7 +6,7 @@
 #ifndef __DAEMON_H__
 #define __DAEMON_H__
 
-#include "net_user.h"
+#include <net_user.h>
 
 #define SWITCH_VERSION 3
 
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index b4a1522f215..7568cc2f3cd 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -6,9 +6,9 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "daemon.h"
 
 struct daemon_init {
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index a4fd7bc14af..8813c10d017 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -14,9 +14,9 @@
 #include <sys/time.h>
 #include <sys/un.h>
 #include "daemon.h"
-#include "net_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 enum request_type { REQ_NEW_CONTROL };
 
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index 5b81d257441..a13a427b996 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -9,8 +9,8 @@
 #include <errno.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct fd_chan {
 	int fd;
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 0345d6206d4..f99b32a4dbf 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -6,7 +6,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
-#include "os.h"
+#include <os.h>
 
 struct dog_data {
 	int stdin;
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index f9f6a4e2059..9b90fdc4b15 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -3,15 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/fs.h"
-#include "linux/module.h"
-#include "linux/slab.h"
-#include "linux/sound.h"
-#include "linux/soundcard.h"
-#include "linux/mutex.h"
-#include "asm/uaccess.h"
-#include "init.h"
-#include "os.h"
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sound.h>
+#include <linux/soundcard.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <init.h>
+#include <os.h>
 
 struct hostaudio_state {
 	int fd;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index bbaf2c59830..b8e841c128a 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -3,15 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/irqreturn.h"
-#include "linux/kd.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
+#include <linux/irqreturn.h>
+#include <linux/kd.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
 #include "chan.h"
-#include "irq_kern.h"
-#include "irq_user.h"
-#include "kern_util.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <kern_util.h>
+#include <os.h>
 
 #define LINE_BUFSIZE 4096
 
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index bae95611e7a..138a14526d9 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -6,12 +6,12 @@
 #ifndef __LINE_H__
 #define __LINE_H__
 
-#include "linux/list.h"
-#include "linux/workqueue.h"
-#include "linux/tty.h"
-#include "linux/interrupt.h"
-#include "linux/spinlock.h"
-#include "linux/mutex.h"
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/tty.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "chan_user.h"
 #include "mconsole_kern.h"
 
diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h
index c139ae1d682..8b22535c62c 100644
--- a/arch/um/drivers/mconsole.h
+++ b/arch/um/drivers/mconsole.h
@@ -12,7 +12,7 @@
 #define u32 uint32_t
 #endif
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 #define MCONSOLE_MAGIC (0xcafebabe)
 #define MCONSOLE_MAX_DATA (512)
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 664a60e8dfb..25e63fdd802 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -24,13 +24,13 @@
 #include <asm/uaccess.h>
 #include <asm/switch_to.h>
 
-#include "init.h"
-#include "irq_kern.h"
-#include "irq_user.h"
-#include "kern_util.h"
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <kern_util.h>
 #include "mconsole.h"
 #include "mconsole_kern.h"
-#include "os.h"
+#include <os.h>
 
 static int do_unlink_socket(struct notifier_block *notifier,
 			    unsigned long what, void *data)
diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
index d2fe07e7895..7a0c6a1ad1d 100644
--- a/arch/um/drivers/mconsole_kern.h
+++ b/arch/um/drivers/mconsole_kern.h
@@ -6,7 +6,7 @@
 #ifndef __MCONSOLE_KERN_H__
 #define __MCONSOLE_KERN_H__
 
-#include "linux/list.h"
+#include <linux/list.h>
 #include "mconsole.h"
 
 struct mconsole_entry {
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
index c0ef803c7c7..62145c27616 100644
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 
 #include <asm/uaccess.h>
-#include "mem_user.h"
+#include <mem_user.h>
 
 /* These are set in mmapper_init, which is called at boot time */
 static unsigned long mmapper_size;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 458d324f062..b1314ebf1f7 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -18,12 +18,12 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include "init.h"
-#include "irq_kern.h"
-#include "irq_user.h"
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
 #include "mconsole_kern.h"
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 
 #define DRIVER_NAME "uml-netdev"
 
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 05090c37fa8..cd14157b556 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -11,9 +11,9 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 int tap_open_common(void *dev, char *gate_addr)
 {
diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c
index 2b45a1446c8..10495747ce8 100644
--- a/arch/um/drivers/null.c
+++ b/arch/um/drivers/null.c
@@ -7,7 +7,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include "chan_user.h"
-#include "os.h"
+#include <os.h>
 
 /* This address is used only as a unique identifier */
 static int null_chan;
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index 2860525f8ff..be0fb57bd1d 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -3,9 +3,9 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "pcap_user.h"
 
 struct pcap_init {
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index 702a75b190e..c07b9c752c8 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -7,9 +7,9 @@
 #include <pcap.h>
 #include <string.h>
 #include <asm/types.h>
-#include "net_user.h"
+#include <net_user.h>
 #include "pcap_user.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 #define PCAP_FD(p) (*(int *)(p))
 
diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h
index d8ba6153f91..1ca7c764cc6 100644
--- a/arch/um/drivers/pcap_user.h
+++ b/arch/um/drivers/pcap_user.h
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct pcap_data {
 	char *host_if;
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 1d83d50236e..40ca5cc275e 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -3,16 +3,16 @@
  * Licensed under the GPL
  */
 
-#include "linux/completion.h"
-#include "linux/interrupt.h"
-#include "linux/list.h"
-#include "linux/mutex.h"
-#include "linux/slab.h"
-#include "linux/workqueue.h"
-#include "asm/atomic.h"
-#include "init.h"
-#include "irq_kern.h"
-#include "os.h"
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <os.h>
 #include "port.h"
 
 struct port_list {
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index 7b010b76ddf..9a8e1b64c22 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -10,9 +10,9 @@
 #include <unistd.h>
 #include <netinet/in.h>
 #include "chan_user.h"
-#include "os.h"
+#include <os.h>
 #include "port.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 struct port_chan {
 	int raw;
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index cff2b75d31f..f1fcc2cedb5 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -12,8 +12,8 @@
 #include <termios.h>
 #include <sys/stat.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct pty_chan {
 	void (*announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index e32c6aa6396..9e3a7220582 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,8 +13,8 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <asm/uaccess.h>
-#include "irq_kern.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <os.h>
 
 /*
  * core module and version information
diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c
index e89cfc68fc3..f597fa7c91d 100644
--- a/arch/um/drivers/slip_common.c
+++ b/arch/um/drivers/slip_common.c
@@ -1,6 +1,6 @@
 #include <string.h>
 #include "slip_common.h"
-#include "net_user.h"
+#include <net_user.h>
 
 int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
 {
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index dd2aadc14af..ed5249fc057 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -6,7 +6,7 @@
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "slip.h"
 
 struct slip_init {
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index 932b4d69bec..55c290d925f 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -11,10 +11,10 @@
 #include <string.h>
 #include <sys/termios.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
+#include <net_user.h>
+#include <os.h>
 #include "slip.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 static int slip_user_init(void *data, void *dev)
 {
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index e376284f0fb..4ef11ca7cac 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -4,11 +4,11 @@
  */
 
 #include <linux/if_arp.h>
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 #include "slirp.h"
 
 struct slirp_init {
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index db4adb639ff..c999d187abb 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -7,8 +7,8 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
+#include <net_user.h>
+#include <os.h>
 #include "slirp.h"
 
 static int slirp_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index 7e86f007012..16fdd0a0f9d 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -3,19 +3,19 @@
  * Licensed under the GPL
  */
 
-#include "linux/fs.h"
-#include "linux/tty.h"
-#include "linux/tty_driver.h"
-#include "linux/major.h"
-#include "linux/mm.h"
-#include "linux/init.h"
-#include "linux/console.h"
-#include "asm/termbits.h"
-#include "asm/irq.h"
+#include <linux/fs.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/major.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <asm/termbits.h>
+#include <asm/irq.h>
 #include "ssl.h"
 #include "chan.h"
-#include "init.h"
-#include "irq_user.h"
+#include <init.h>
+#include <irq_user.h>
 #include "mconsole_kern.h"
 
 static const int ssl_version = 1;
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 929b99a261f..827777af3f6 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -3,27 +3,27 @@
  * Licensed under the GPL
  */
 
-#include "linux/posix_types.h"
-#include "linux/tty.h"
-#include "linux/tty_flip.h"
-#include "linux/types.h"
-#include "linux/major.h"
-#include "linux/kdev_t.h"
-#include "linux/console.h"
-#include "linux/string.h"
-#include "linux/sched.h"
-#include "linux/list.h"
-#include "linux/init.h"
-#include "linux/interrupt.h"
-#include "linux/slab.h"
-#include "linux/hardirq.h"
-#include "asm/current.h"
-#include "asm/irq.h"
+#include <linux/posix_types.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <asm/current.h>
+#include <asm/irq.h>
 #include "stdio_console.h"
 #include "chan.h"
-#include "irq_user.h"
+#include <irq_user.h>
 #include "mconsole_kern.h"
-#include "init.h"
+#include <init.h>
 
 #define MAX_TTYS (16)
 
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index a97391f9ec5..eaa201bca5e 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -7,8 +7,8 @@
 #include <fcntl.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct tty_chan {
 	char *dev;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 0643e5bc9f4..41bf72073cc 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -33,12 +33,12 @@
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <asm/tlbflush.h>
-#include "kern_util.h"
+#include <kern_util.h>
 #include "mconsole_kern.h"
-#include "init.h"
-#include "irq_kern.h"
+#include <init.h>
+#include <irq_kern.h>
 #include "ubd.h"
-#include "os.h"
+#include <os.h>
 #include "cow.h"
 
 enum ubd_req { UBD_READ, UBD_WRITE };
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index ffe02c431de..a703e45d8aa 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -19,7 +19,7 @@
 #include <byteswap.h>
 
 #include "ubd.h"
-#include "os.h"
+#include <os.h>
 
 void ignore_sigwinch_sig(void)
 {
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
index 6f8c0fe890f..c190c644091 100644
--- a/arch/um/drivers/umcast.h
+++ b/arch/um/drivers/umcast.h
@@ -6,7 +6,7 @@
 #ifndef __DRIVERS_UMCAST_H
 #define __DRIVERS_UMCAST_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct umcast_data {
 	char *addr;
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
index 42dab11d2ec..f5ba6e37791 100644
--- a/arch/um/drivers/umcast_kern.c
+++ b/arch/um/drivers/umcast_kern.c
@@ -11,10 +11,10 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
 #include "umcast.h"
-#include "net_kern.h"
+#include <net_kern.h>
 
 struct umcast_init {
 	char *addr;
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
index 010fa2d849e..6074184bb51 100644
--- a/arch/um/drivers/umcast_user.c
+++ b/arch/um/drivers/umcast_user.c
@@ -16,8 +16,8 @@
 #include <errno.h>
 #include <netinet/in.h>
 #include "umcast.h"
-#include "net_user.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <um_malloc.h>
 
 static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 {
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
index 1b852bffdeb..6a365fadc7c 100644
--- a/arch/um/drivers/vde_kern.c
+++ b/arch/um/drivers/vde_kern.c
@@ -7,10 +7,10 @@
  *
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 #include "vde.h"
 
 static void vde_init(struct net_device *dev, void *data)
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
index b8c286748d3..64cb630d115 100644
--- a/arch/um/drivers/vde_user.c
+++ b/arch/um/drivers/vde_user.c
@@ -6,8 +6,8 @@
 #include <stddef.h>
 #include <errno.h>
 #include <libvdeplug.h>
-#include "net_user.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <um_malloc.h>
 #include "vde.h"
 
 static int vde_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 969110e5648..20e30be4479 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -11,8 +11,8 @@
 #include <string.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 #include "xterm.h"
 
 struct xterm_chan {
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index e3031e69445..e8f9957bfbf 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -7,8 +7,8 @@
 #include <linux/completion.h>
 #include <linux/irqreturn.h>
 #include <asm/irq.h>
-#include "irq_kern.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <os.h>
 
 struct xterm_wait {
 	struct completion ready;
diff --git a/arch/um/include/asm/dma.h b/arch/um/include/asm/dma.h
index 9f6139a8a52..f88c5860520 100644
--- a/arch/um/include/asm/dma.h
+++ b/arch/um/include/asm/dma.h
@@ -1,7 +1,7 @@
 #ifndef __UM_DMA_H
 #define __UM_DMA_H
 
-#include "asm/io.h"
+#include <asm/io.h>
 
 extern unsigned long uml_physmem;
 
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index 53e8b498ebb..da705448590 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -6,7 +6,7 @@
 #ifndef __ARCH_UM_MMU_H
 #define __ARCH_UM_MMU_H
 
-#include "mm_id.h"
+#include <mm_id.h>
 #include <asm/mm_context.h>
 
 typedef struct mm_context {
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 7cfc3cedce8..5ff53d9185f 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -99,7 +99,7 @@ extern unsigned long uml_physmem;
 
 #define __va_space (8*1024*1024)
 
-#include "mem.h"
+#include <mem.h>
 
 /* Cast to unsigned long before casting to void * to avoid a warning from
  * mmap_kmem about cutting a long long down to a void *.  Not sure that
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 5888f1b8347..ae02909a187 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -23,9 +23,9 @@
 				   pte_present gives true */
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
-#include "asm/pgtable-3level.h"
+#include <asm/pgtable-3level.h>
 #else
-#include "asm/pgtable-2level.h"
+#include <asm/pgtable-2level.h>
 #endif
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 33a6a2423bd..24e97be814b 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -10,9 +10,9 @@ struct pt_regs;
 
 struct task_struct;
 
-#include "asm/ptrace.h"
-#include "registers.h"
-#include "sysdep/archsetjmp.h"
+#include <asm/ptrace.h>
+#include <registers.h>
+#include <sysdep/archsetjmp.h>
 
 #include <linux/prefetch.h>
 
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 442f1d025dc..cb9b3c47ca8 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -9,7 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace-abi.h>
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct pt_regs {
 	struct uml_pt_regs regs;
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index 4a4b09d4f36..e4507938d8c 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -3,9 +3,9 @@
 
 #ifdef CONFIG_SMP
 
-#include "linux/bitops.h"
-#include "asm/current.h"
-#include "linux/cpumask.h"
+#include <linux/bitops.h>
+#include <asm/current.h>
+#include <linux/cpumask.h>
 
 #define raw_smp_processor_id() (current_thread->cpu)
 
diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h
index 2de92a08a76..4f46abda060 100644
--- a/arch/um/include/shared/arch.h
+++ b/arch/um/include/shared/arch.h
@@ -6,7 +6,7 @@
 #ifndef __ARCH_H__
 #define __ARCH_H__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 extern void arch_check_bugs(void);
 extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 86daa546181..694c792bab4 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -35,7 +35,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct cpu_task {
 	int pid;
diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h
index 7a5bfa6291b..e05bd667de1 100644
--- a/arch/um/include/shared/irq_kern.h
+++ b/arch/um/include/shared/irq_kern.h
@@ -6,8 +6,8 @@
 #ifndef __IRQ_KERN_H__
 #define __IRQ_KERN_H__
 
-#include "linux/interrupt.h"
-#include "asm/ptrace.h"
+#include <linux/interrupt.h>
+#include <asm/ptrace.h>
 
 extern int um_request_irq(unsigned int irq, int fd, int type,
 			  irq_handler_t handler,
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 2b6d703925b..df563305395 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -6,7 +6,7 @@
 #ifndef __IRQ_USER_H__
 #define __IRQ_USER_H__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct irq_fd {
 	struct irq_fd *next;
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index af6b6dc868b..83a91f97633 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -6,8 +6,8 @@
 #ifndef __KERN_UTIL_H__
 #define __KERN_UTIL_H__
 
-#include "sysdep/ptrace.h"
-#include "sysdep/faultinfo.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/faultinfo.h>
 
 struct siginfo;
 
diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
index e860bc5848e..9bdddf4c405 100644
--- a/arch/um/include/shared/longjmp.h
+++ b/arch/um/include/shared/longjmp.h
@@ -1,8 +1,8 @@
 #ifndef __UML_LONGJMP_H
 #define __UML_LONGJMP_H
 
-#include "sysdep/archsetjmp.h"
-#include "os.h"
+#include <sysdep/archsetjmp.h>
+#include <os.h>
 
 extern int setjmp(jmp_buf);
 extern void longjmp(jmp_buf, int);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 89b686c1a3e..44883049c11 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -7,9 +7,9 @@
 #define __OS_H__
 
 #include <stdarg.h>
-#include "irq_user.h"
-#include "longjmp.h"
-#include "mm_id.h"
+#include <irq_user.h>
+#include <longjmp.h>
+#include <mm_id.h>
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index f1e0aa56c52..f5b76355ad7 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -6,8 +6,8 @@
 #ifndef __REGISTERS_H
 #define __REGISTERS_H
 
-#include "sysdep/ptrace.h"
-#include "sysdep/archsetjmp.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/archsetjmp.h>
 
 extern int save_fp_registers(int pid, unsigned long *fp_regs);
 extern int restore_fp_registers(int pid, unsigned long *fp_regs);
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index 64d2c744330..c45df961c87 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -6,7 +6,7 @@
 #ifndef __SKAS_H
 #define __SKAS_H
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 extern int userspace_pid[];
 extern int proc_mm, ptrace_faultinfo, ptrace_ldt;
diff --git a/arch/um/include/shared/skas_ptrace.h b/arch/um/include/shared/skas_ptrace.h
index 3d31bbacd01..630a9c92b93 100644
--- a/arch/um/include/shared/skas_ptrace.h
+++ b/arch/um/include/shared/skas_ptrace.h
@@ -9,6 +9,6 @@
 #define PTRACE_FAULTINFO 52
 #define PTRACE_SWITCH_MM 55
 
-#include "sysdep/skas_ptrace.h"
+#include <sysdep/skas_ptrace.h>
 
 #endif
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
index 91ea538e161..1fb12235ab9 100644
--- a/arch/um/kernel/asm-offsets.c
+++ b/arch/um/kernel/asm-offsets.c
@@ -1 +1 @@
-#include "sysdep/kernel-offsets.h"
+#include <sysdep/kernel-offsets.h>
diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
index b7a43feafde..972bf165956 100644
--- a/arch/um/kernel/config.c.in
+++ b/arch/um/kernel/config.c.in
@@ -5,7 +5,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "init.h"
+#include <init.h>
 
 static __initdata const char *config[] = {
 "CONFIG"
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index a3cab6d3ae0..fb8fd6fb656 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -89,7 +89,7 @@ SECTIONS
 
   .kstrtab : { *(.kstrtab) }
 
-  #include "asm/common.lds.S"
+  #include <asm/common.lds.S>
 
   init.data : { INIT_DATA }
 
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
index ec649bf72f6..49480f09245 100644
--- a/arch/um/kernel/early_printk.c
+++ b/arch/um/kernel/early_printk.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include "os.h"
+#include <os.h>
 
 static void early_console_write(struct console *con, const char *s, unsigned int n)
 {
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 8c82786da82..de66c421ae9 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -12,10 +12,10 @@
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
-#include "as-layout.h"
-#include "mem_user.h"
-#include "skas.h"
-#include "os.h"
+#include <as-layout.h>
+#include <mem_user.h>
+#include <skas.h>
+#include <os.h>
 #include "internal.h"
 
 void flush_thread(void)
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
index e9bcf247bce..1bf61266da8 100644
--- a/arch/um/kernel/gmon_syms.c
+++ b/arch/um/kernel/gmon_syms.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
+#include <linux/module.h>
 
 extern void __bb_init_func(void *)  __attribute__((weak));
 EXPORT_SYMBOL(__bb_init_func);
diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c
index e2f043d0de6..74ddb44288a 100644
--- a/arch/um/kernel/gprof_syms.c
+++ b/arch/um/kernel/gprof_syms.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
+#include <linux/module.h>
 
 extern void mcount(void);
 EXPORT_SYMBOL(mcount);
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 10cc18f729f..55cead809b1 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/init.h"
-#include "linux/bootmem.h"
-#include "linux/initrd.h"
-#include "asm/types.h"
-#include "init.h"
-#include "os.h"
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/initrd.h>
+#include <asm/types.h>
+#include <init.h>
+#include <os.h>
 
 /* Changed by uml_initrd_setup, which is a setup */
 static char *initrd __initdata = NULL;
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9883026f073..36e12f0cefd 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -5,17 +5,17 @@
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */
 
-#include "linux/cpumask.h"
-#include "linux/hardirq.h"
-#include "linux/interrupt.h"
-#include "linux/kernel_stat.h"
-#include "linux/module.h"
-#include "linux/sched.h"
-#include "linux/seq_file.h"
-#include "linux/slab.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
+#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
 
 /*
  * This list is accessed under irq_lock, except in sigio_handler,
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index e17bea0b22e..543c0475693 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/module.h>
-#include "os.h"
+#include <os.h>
 
 EXPORT_SYMBOL(set_signals);
 EXPORT_SYMBOL(get_signals);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index ebb86b21844..5abcbfbe7e2 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -12,12 +12,12 @@
 #include <linux/slab.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern.h"
-#include "kern_util.h"
-#include "mem_user.h"
-#include "os.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern.h>
+#include <kern_util.h>
+#include <mem_user.h>
+#include <os.h>
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5f5afa5074..41f53240e79 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -23,10 +23,10 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/uaccess.h>
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3d15243ce69..ced8903921a 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -3,13 +3,13 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/spinlock.h"
-#include "linux/slab.h"
-#include "linux/oom.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/oom.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 void (*pm_power_off)(void);
 
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index c88211139a5..b5e0cbb3438 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -4,9 +4,9 @@
  */
 
 #include <linux/interrupt.h>
-#include "irq_kern.h"
-#include "os.h"
-#include "sigio.h"
+#include <irq_kern.h>
+#include <os.h>
+#include <sigio.h>
 
 /* Protected by sigio_lock() called from write_sigio_workaround */
 static int sigio_irq_fd = -1;
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index cc9c2350e41..db18eb6124e 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -9,8 +9,8 @@
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
-#include "frame_kern.h"
-#include "kern_util.h"
+#include <frame_kern.h>
+#include <kern_util.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index e1fd066a352..289771dadf8 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -7,10 +7,10 @@
 #include <sched.h>
 #include <asm/unistd.h>
 #include <sys/time.h>
-#include "as-layout.h"
-#include "ptrace_user.h"
-#include "stub-data.h"
-#include "sysdep/stub.h"
+#include <as-layout.h>
+#include <ptrace_user.h>
+#include <stub-data.h>
+#include <sysdep/stub.h>
 
 /*
  * This is in a separate file because it needs to be compiled with any
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0a49ef0c2bf..ff03067a3b1 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -3,14 +3,14 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "asm/pgalloc.h"
-#include "asm/pgtable.h"
-#include "as-layout.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <as-layout.h>
+#include <os.h>
+#include <skas.h>
 
 extern int __syscall_stub_start;
 
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 0a9e57e7446..4da11b3c8dd 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/init.h"
-#include "linux/sched.h"
-#include "as-layout.h"
-#include "kern.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <as-layout.h>
+#include <kern.h>
+#include <os.h>
+#include <skas.h>
 
 int new_mm(unsigned long stack)
 {
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 86368a025a9..c0681e09743 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -3,11 +3,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/ptrace.h"
-#include "kern_util.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/syscalls.h"
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <kern_util.h>
+#include <sysdep/ptrace.h>
+#include <sysdep/syscalls.h>
 
 extern int syscall_table_size;
 #define NR_SYSCALLS (syscall_table_size / sizeof(void *))
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index cd7df79c6a5..1d3e0c17340 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -11,8 +11,8 @@
 #include <asm/current.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index a02b7e9e6b9..5c8c3ea7db7 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -3,24 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/percpu.h"
-#include "asm/pgalloc.h"
-#include "asm/tlb.h"
+#include <linux/percpu.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
 
 #ifdef CONFIG_SMP
 
-#include "linux/sched.h"
-#include "linux/module.h"
-#include "linux/threads.h"
-#include "linux/interrupt.h"
-#include "linux/err.h"
-#include "linux/hardirq.h"
-#include "asm/smp.h"
-#include "asm/processor.h"
-#include "asm/spinlock.h"
-#include "kern.h"
-#include "irq_user.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/spinlock.h>
+#include <kern.h>
+#include <irq_user.h>
+#include <os.h>
 
 /* Per CPU bogomips and other parameters
  * The only piece used here is the ipi pipe, which is set before SMP is
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a4c6d8eee74..10808bda367 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -3,16 +3,16 @@
  * Licensed under the GPL
  */
 
-#include "linux/file.h"
-#include "linux/fs.h"
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "linux/utsname.h"
-#include "linux/syscalls.h"
-#include "asm/current.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/syscalls.h>
+#include <asm/current.h>
+#include <asm/mman.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
 #include "internal.h"
 
 long sys_fork(void)
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 5f76d4ba151..117568d4f64 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -10,8 +10,8 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 #include <asm/param.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f819af951c1..9472079471b 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -8,10 +8,10 @@
 #include <linux/sched.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include "as-layout.h"
-#include "mem_user.h"
-#include "os.h"
-#include "skas.h"
+#include <as-layout.h>
+#include <mem_user.h>
+#include <os.h>
+#include <skas.h>
 
 struct host_vm_change {
 	struct host_vm_op {
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35..cf7585fbc9f 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -10,11 +10,11 @@
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include "arch.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <arch.h>
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 /*
  * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 4db8770906c..87df5e3acc2 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -14,13 +14,13 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
-#include "as-layout.h"
-#include "arch.h"
-#include "init.h"
-#include "kern.h"
-#include "kern_util.h"
-#include "mem_user.h"
-#include "os.h"
+#include <as-layout.h>
+#include <arch.h>
+#include <init.h>
+#include <kern.h>
+#include <kern_util.h>
+#include <mem_user.h>
+#include <os.h>
 
 #define DEFAULT_COMMAND_LINE "root=98:0"
 
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index 81e07e2be3a..f6cc3bd6178 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -4,9 +4,9 @@
  */
 
 #include <asm/errno.h>
-#include "init.h"
-#include "kern.h"
-#include "os.h"
+#include <init.h>
+#include <kern.h>
+#include <os.h>
 
 /* Changed by set_umid_arg */
 static int umid_inited = 0;
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index fbd99402d4d..ff65fb4f1a9 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -60,7 +60,7 @@ SECTIONS
 	PROVIDE_HIDDEN(__rela_iplt_end = .);
   }
 
-  #include "asm/common.lds.S"
+  #include <asm/common.lds.S>
 
   init.data : { INIT_DATA }
   .data    :
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index c5d039e1ff3..3a6bc2af096 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -9,10 +9,10 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <asm/unistd.h>
-#include "aio.h"
-#include "init.h"
-#include "kern_util.h"
-#include "os.h"
+#include <aio.h>
+#include <init.h>
+#include <kern_util.h>
+#include <os.h>
 
 struct aio_thread_req {
 	enum aio_type type;
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
index ddffd41c3f3..54183a679fd 100644
--- a/arch/um/os-Linux/drivers/etap.h
+++ b/arch/um/os-Linux/drivers/etap.h
@@ -6,7 +6,7 @@
 #ifndef __DRIVERS_ETAP_H
 #define __DRIVERS_ETAP_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct ethertap_data {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 7f6f9a71aae..f424600a583 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include "etap.h"
-#include "net_kern.h"
+#include <net_kern.h>
 
 struct ethertap_init {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index db3d6481375..b39b6696ac5 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -13,9 +13,9 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "etap.h"
-#include "os.h"
-#include "net_user.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <net_user.h>
+#include <um_malloc.h>
 
 #define MAX_PACKET ETH_MAX_PACKET
 
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
index f17c31586c8..7367354ac8d 100644
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ b/arch/um/os-Linux/drivers/tuntap.h
@@ -6,7 +6,7 @@
 #ifndef __UM_TUNTAP_H
 #define __UM_TUNTAP_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct tuntap_data {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index 4048800e469..d9d56e5810f 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <asm/errno.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "tuntap.h"
 
 struct tuntap_init {
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index a2aacffdd90..14126d9176a 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -13,8 +13,8 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 #include "tuntap.h"
 
 static int tuntap_user_init(void *data, void *dev)
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index d895271ad6f..1a365ddc4d0 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -9,9 +9,9 @@
  */
 #include <elf.h>
 #include <stddef.h>
-#include "init.h"
-#include "elf_user.h"
-#include "mem_user.h"
+#include <init.h>
+#include <elf_user.h>
+#include <mem_user.h>
 
 typedef Elf32_auxv_t elf_auxv_t;
 
diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 66e583a4031..8fb25ca07c4 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -27,12 +27,12 @@
 #include <limits.h>
 
 #ifndef TEST
-#include "um_malloc.h"
+#include <um_malloc.h>
 #else
 #include <stdio.h>
 #define um_kmalloc malloc
 #endif
-#include "os.h"
+#include <os.h>
 
 /* Execute FILE, searching in the `PATH' environment variable if it contains
    no slashes, with arguments ARGV and environment from `environ'.  */
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index b049a63bb74..c17bd6f7d67 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -13,7 +13,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
-#include "os.h"
+#include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
 {
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index cf26c4a9a43..e3ee4a51ef6 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -10,9 +10,9 @@
 #include <linux/limits.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 struct helper_data {
 	void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 9a49908b576..b9afb74b79a 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -8,9 +8,9 @@
 #include <poll.h>
 #include <signal.h>
 #include <string.h>
-#include "irq_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <irq_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 /*
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 7a86dd516eb..749c96da7b9 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -10,11 +10,11 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/resource.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 #define PGD_BOUND (4 * 1024 * 1024)
 #define STACKSIZE (8 * 1024 * 1024)
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 8e421e1d6d3..ba4398056fe 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -13,8 +13,8 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/param.h>
-#include "init.h"
-#include "os.h"
+#include <init.h>
+#include <os.h>
 
 /* Modified by which_tmpdir, which is called during early boot */
 static char *default_tmpdir = "/tmp";
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 307f173e7f8..162bea3d91b 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -12,10 +12,10 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "init.h"
-#include "longjmp.h"
-#include "os.h"
-#include "skas_ptrace.h"
+#include <init.h>
+#include <longjmp.h>
+#include <os.h>
+#include <skas_ptrace.h>
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index b866b9e3bef..2ff8d4fe83c 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -7,9 +7,9 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/ptrace.h>
-#include "sysdep/ptrace.h"
-#include "sysdep/ptrace_user.h"
-#include "registers.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/ptrace_user.h>
+#include <registers.h>
 
 int save_registers(int pid, struct uml_pt_regs *regs)
 {
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 3c161218c67..8b61cc0e82c 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -11,11 +11,11 @@
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
-#include "kern_util.h"
-#include "init.h"
-#include "os.h"
-#include "sigio.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <init.h>
+#include <os.h>
+#include <sigio.h>
+#include <um_malloc.h>
 
 /*
  * Protected by sigio_lock(), also used by sigio_cleanup, which is an
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6366ce904b9..b1469fe9329 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -9,10 +9,10 @@
 #include <errno.h>
 #include <signal.h>
 #include <strings.h>
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "sysdep/mcontext.h"
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <sysdep/mcontext.h>
 #include "internal.h"
 
 void (*sig_info[NSIG])(int, siginfo_t *, struct uml_pt_regs *) = {
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 90b310d2917..689b18db798 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -8,16 +8,16 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/mman.h>
-#include "init.h"
-#include "as-layout.h"
-#include "mm_id.h"
-#include "os.h"
-#include "proc_mm.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/stub.h"
+#include <init.h>
+#include <as-layout.h>
+#include <mm_id.h>
+#include <os.h>
+#include <proc_mm.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <sysdep/ptrace.h>
+#include <sysdep/stub.h>
 
 extern unsigned long batch_syscall_stub, __syscall_stub_start;
 
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index d93bb40499f..4625949bf1e 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -11,17 +11,17 @@
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern_util.h"
-#include "mem.h"
-#include "os.h"
-#include "proc_mm.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/stub.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern_util.h>
+#include <mem.h>
+#include <os.h>
+#include <proc_mm.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <skas_ptrace.h>
+#include <sysdep/stub.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 425162e22af..da4b9e9999f 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -16,13 +16,13 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "init.h"
-#include "os.h"
-#include "mem_user.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "skas_ptrace.h"
+#include <init.h>
+#include <os.h>
+#include <mem_user.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <skas_ptrace.h>
 
 static void ptrace_child(void)
 {
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 0748fe0c8a7..fac388cb464 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -8,8 +8,8 @@
 #include <signal.h>
 #include <time.h>
 #include <sys/time.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 #include "internal.h"
 
 int set_interval(void)
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index dd12b99dcb5..721d8afa329 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -7,8 +7,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 struct grantpt_info {
 	int fd;
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 4832eb519f8..c1dc89261f6 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -12,8 +12,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/stat.h>
-#include "init.h"
-#include "os.h"
+#include <init.h>
+#include <os.h>
 
 #define UML_DIR "~/.uml/"
 
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 73926fa3f96..db4a034aeee 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -1,5 +1,5 @@
-#include "linux/types.h"
-#include "linux/module.h"
+#include <linux/types.h>
+#include <linux/module.h>
 
 /* Some of this are builtin function (some are not but could in the future),
  * so I *must* declare good prototypes for them and then EXPORT them.
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 9e3b43bb84c..492ef5e6e16 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -13,7 +13,7 @@
 #include <wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
-#include "os.h"
+#include <os.h>
 
 void stack_protections(unsigned long address)
 {
diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c
index 1c11aed9c71..25908d26ce0 100644
--- a/arch/um/sys-ppc/miscthings.c
+++ b/arch/um/sys-ppc/miscthings.c
@@ -1,6 +1,6 @@
-#include "linux/threads.h"
-#include "linux/stddef.h"  // for NULL
-#include "linux/elf.h"  // for AT_NULL
+#include <linux/threads.h>
+#include <linux/stddef.h>  // for NULL
+#include <linux/elf.h>  // for AT_NULL
 
 /* The following function nicked from arch/ppc/kernel/process.c and
  * adapted slightly */
diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c
index 66ef155248f..8245df41b20 100644
--- a/arch/um/sys-ppc/ptrace.c
+++ b/arch/um/sys-ppc/ptrace.c
@@ -1,4 +1,4 @@
-#include "linux/sched.h"
+#include <linux/sched.h>
 #include "asm/ptrace.h"
 
 int putreg(struct task_struct *child, unsigned long regno, 
diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c
index 224d2403c37..4601b9296aa 100644
--- a/arch/um/sys-ppc/ptrace_user.c
+++ b/arch/um/sys-ppc/ptrace_user.c
@@ -1,6 +1,6 @@
 #include <errno.h>
 #include <asm/ptrace.h>
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h
index 0e3230e937e..efe0c1a3ea9 100644
--- a/arch/um/sys-ppc/shared/sysdep/ptrace.h
+++ b/arch/um/sys-ppc/shared/sysdep/ptrace.h
@@ -5,7 +5,7 @@
 #ifndef __SYS_PTRACE_PPC_H
 #define __SYS_PTRACE_PPC_H
 
-#include "linux/types.h"
+#include <linux/types.h>
 
 /* the following taken from <asm-ppc/ptrace.h> */
 
diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c
index 40694d0f3d1..aac6c83fe44 100644
--- a/arch/um/sys-ppc/sigcontext.c
+++ b/arch/um/sys-ppc/sigcontext.c
@@ -1,4 +1,4 @@
 #include "asm/ptrace.h"
 #include "asm/sigcontext.h"
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c
index 2f816f1a0ff..f889449f928 100644
--- a/arch/um/sys-ppc/sysrq.c
+++ b/arch/um/sys-ppc/sysrq.c
@@ -3,8 +3,8 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/smp.h"
+#include <linux/kernel.h>
+#include <linux/smp.h>
 #include "asm/ptrace.h"
 #include "sysrq.h"
 
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0e07adc8cbe..0feee2fd507 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -6,7 +6,7 @@
 #define __UM_ELF_X86_H
 
 #include <asm/user.h>
-#include "skas.h"
+#include <skas.h>
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index 17d88cf2c6c..33daff4dade 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -4,9 +4,9 @@
  */
 
 #include <signal.h>
-#include "kern_util.h"
-#include "longjmp.h"
-#include "sysdep/ptrace.h"
+#include <kern_util.h>
+#include <longjmp.h>
+#include <sysdep/ptrace.h>
 #include <generated/asm-offsets.h>
 
 /* Set during early boot */
diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c
index 44e02ba2a26..8cc8256c698 100644
--- a/arch/x86/um/bugs_64.c
+++ b/arch/x86/um/bugs_64.c
@@ -4,7 +4,7 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 void arch_check_bugs(void)
 {
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index d670f68532f..8784ab30d91 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 /* These two are from asm-um/uaccess.h and linux/module.h, check them. */
 struct exception_table_entry
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 26b0e39d2ce..8e08176f0bc 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -7,11 +7,11 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <asm/unistd.h>
-#include "os.h"
-#include "proc_mm.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/tls.h"
+#include <os.h>
+#include <proc_mm.h>
+#include <skas.h>
+#include <skas_ptrace.h>
+#include <sysdep/tls.h>
 
 extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
 
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index 546518727a7..c6492e75797 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -1,6 +1,6 @@
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/mman.h"
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/mman.h>
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 0cdbb86b012..41bfe84e11a 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -9,8 +9,8 @@
 #ifdef __i386__
 #include <sys/user.h>
 #endif
-#include "longjmp.h"
-#include "sysdep/ptrace_user.h"
+#include <longjmp.h>
+#include <sysdep/ptrace_user.h>
 
 int save_fp_registers(int pid, unsigned long *fp_regs)
 {
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index efb16c5c9bc..8502ad30e61 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -2,7 +2,7 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <sys/mman.h>
-#include "longjmp.h"
+#include <longjmp.h>
 
 #ifdef __i386__
 
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
index 82276b6071a..9d94b3b76c7 100644
--- a/arch/x86/um/os-Linux/tls.c
+++ b/arch/x86/um/os-Linux/tls.c
@@ -5,7 +5,7 @@
 #include <sys/syscall.h>
 #include <unistd.h>
 
-#include "sysdep/tls.h"
+#include <sysdep/tls.h>
 
 #ifndef PTRACE_GET_THREAD_AREA
 #define PTRACE_GET_THREAD_AREA 25
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 3b949daa095..ce3dd4f36f3 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -3,10 +3,10 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "skas.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <skas.h>
 
 extern int arch_switch_tls(struct task_struct *to);
 
diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c
index 3960ca1dd35..617885b1899 100644
--- a/arch/x86/um/ptrace_user.c
+++ b/arch/x86/um/ptrace_user.c
@@ -4,7 +4,7 @@
  */
 
 #include <errno.h>
-#include "ptrace_user.h"
+#include <ptrace_user.h>
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 6ce2d76eb90..eb9356904ad 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -2,7 +2,7 @@
 #define __SYSDEP_X86_PTRACE_H
 
 #include <generated/user_constants.h>
-#include "sysdep/faultinfo.h"
+#include <sysdep/faultinfo.h>
 
 #define MAX_REG_OFFSET (UM_FRAME_SIZE)
 #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index bd161e30010..3f55e5bd3ce 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -1,8 +1,8 @@
 #include <asm/unistd.h>
 #include <sys/mman.h>
 #include <signal.h>
-#include "as-layout.h"
-#include "stub-data.h"
+#include <as-layout.h>
+#include <stub-data.h>
 
 #ifdef __i386__
 #include "stub_32.h"
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 05cb796aecb..8436079be91 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -3,8 +3,8 @@
  * Licensed under the GPL
  */
 
-#include "asm/unistd.h"
-#include "sysdep/ptrace.h"
+#include <asm/unistd.h>
+#include <sysdep/ptrace.h>
 
 typedef long syscall_handler_t(struct pt_regs);
 
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index ba7363ecf89..bdaa08cfbcf 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -11,8 +11,8 @@
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
+#include <frame_kern.h>
+#include <skas.h>
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
index 54a36ec20cb..b972649d3a1 100644
--- a/arch/x86/um/stub_32.S
+++ b/arch/x86/um/stub_32.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
 
 	.globl syscall_stub
 .section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
index 20e4a96a6dc..7160b20172d 100644
--- a/arch/x86/um/stub_64.S
+++ b/arch/x86/um/stub_64.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
 
 	.globl syscall_stub
 .section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index b7450bd22e7..1518d2805ae 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -3,9 +3,9 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/stub.h"
-#include "sysdep/faultinfo.h"
-#include "sysdep/mcontext.h"
+#include <sysdep/stub.h>
+#include <sysdep/faultinfo.h>
+#include <sysdep/mcontext.h>
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index baba84f8ecb..5f5feff3d24 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/percpu.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "os.h"
-#include "skas.h"
-#include "sysdep/tls.h"
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <os.h>
+#include <skas.h>
+#include <sysdep/tls.h>
 
 /*
  * If needed we can detect when it's uninitialized.
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index f7ba46200ec..d22363cb854 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -1,4 +1,4 @@
-#include "linux/sched.h"
+#include <linux/sched.h>
 
 void clear_flushed_tls(struct task_struct *task)
 {
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 1fe731337f0..9c88da0e855 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -1,7 +1,7 @@
 #ifndef __UM_FS_HOSTFS
 #define __UM_FS_HOSTFS
 
-#include "os.h"
+#include <os.h>
 
 /*
  * These are exactly the same definitions as in fs.h, but the names are
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 124146543aa..9e45f65241e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -16,8 +16,8 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include "hostfs.h"
-#include "init.h"
-#include "kern.h"
+#include <init.h>
+#include <kern.h>
 
 struct hostfs_inode_info {
 	int fd;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index a74ad0d371c..67838f3aa20 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -15,7 +15,6 @@
 #include <sys/types.h>
 #include <sys/vfs.h>
 #include "hostfs.h"
-#include "os.h"
 #include <utime.h>
 
 static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index c1dffe47fde..a68cb209251 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -18,7 +18,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/namei.h>
 #include <asm/uaccess.h>
-#include "os.h"
+#include <os.h>
 
 static struct inode *get_inode(struct super_block *, struct dentry *);
 
-- 
cgit v1.2.3-70-g09d2


From 42c12213141d7c88e8f649b4bacd19ba14577658 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 8 Aug 2012 21:12:19 -0500
Subject: kgdb,x86: fix warning about unused variable

When compiling without CONFIG_DEBUG_RODATA the following
compiler warning is generated:

arch/x86/kernel/kgdb.c: In function 'kgdb_arch_set_breakpoint':
arch/x86/kernel/kgdb.c:749: warning: unused variable 'opc'

The variable instantiation needs to be inside the #ifdef to
make the warning go away.

Reported-by: Thiago Rafael Becker <trbecker@trbecker.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3f61904365c..836f8322960 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
+#ifdef CONFIG_DEBUG_RODATA
 	char opc[BREAK_INSTR_SIZE];
+#endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
 	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
-- 
cgit v1.2.3-70-g09d2


From 22e2430d60dbdfcdd732a086e9ef2dbd74c266d1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Oct 2012 21:35:42 -0400
Subject: x86, um: convert to saner kernel_execve() semantics

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  2 --
 arch/um/include/shared/os.h             |  1 -
 arch/um/kernel/exec.c                   |  5 -----
 arch/um/kernel/process.c                | 10 +++-------
 arch/um/os-Linux/process.c              | 13 -------------
 arch/x86/Kconfig                        |  1 +
 arch/x86/include/asm/unistd.h           |  1 -
 arch/x86/kernel/entry_32.S              | 31 ++++++++++++-------------------
 arch/x86/kernel/entry_64.S              | 24 ++++--------------------
 arch/x86/um/Kconfig                     |  1 +
 10 files changed, 21 insertions(+), 68 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 5d9ab0c4f48..62435a00e70 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -26,7 +26,6 @@ struct thread_struct {
 	jmp_buf *fault_catcher;
 	struct task_struct *prev_sched;
 	unsigned long temp_stack;
-	jmp_buf *exec_buf;
 	struct arch_thread arch;
 	jmp_buf switch_buf;
 	int mm_count;
@@ -54,7 +53,6 @@ struct thread_struct {
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
 	.temp_stack		= 0, \
-	.exec_buf		= NULL, \
 	.arch			= INIT_ARCH_THREAD, \
 	.request		= { 0 } \
 }
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 89b686c1a3e..25dbd372d32 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -191,7 +191,6 @@ extern int os_getpid(void);
 extern int os_getpgrp(void);
 
 extern void init_new_thread_signals(void);
-extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr);
 
 extern int os_map_memory(void *virt, int fd, unsigned long long off,
 			 unsigned long len, int r, int w, int x);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index e427301f55d..565ca396d83 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -47,8 +47,3 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 #endif
 }
 EXPORT_SYMBOL(start_thread);
-
-void __noreturn ret_from_kernel_execve(struct pt_regs *unused)
-{
-	UML_LONGJMP(current->thread.exec_buf, 1);
-}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index a1b50add48a..94b0d8b9810 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -135,14 +135,10 @@ void new_thread_handler(void)
 	arg = current->thread.request.u.thread.arg;
 
 	/*
-	 * The return value is 1 if the kernel thread execs a process,
-	 * 0 if it just exits
+	 * callback returns only if the kernel thread execs a process
 	 */
-	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
-	if (n == 1)
-		userspace(&current->thread.regs.regs);
-	else
-		do_exit(0);
+	n = fn(arg);
+	userspace(&current->thread.regs.regs);
 }
 
 /* Called magically, see new_thread_handler above */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 307f173e7f8..a04ec167a9c 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -244,16 +244,3 @@ void init_new_thread_signals(void)
 	signal(SIGWINCH, SIG_IGN);
 	signal(SIGTERM, SIG_DFL);
 }
-
-int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr)
-{
-	jmp_buf buf;
-	int n;
-
-	*jmp_ptr = &buf;
-	n = UML_SETJMP(&buf);
-	if (n != 0)
-		return n;
-	(*fn)(arg);
-	return 0;
-}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d93eb9d1bb9..45edcba41e3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -98,6 +98,7 @@ config X86
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 55d155560fd..16f3fc6ebf2 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -51,7 +51,6 @@
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
 # define __ARCH_WANT_SYS_EXECVE
-# define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fe4cc305d8d..91d295908c3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -298,12 +298,20 @@ ENTRY(ret_from_fork)
 	CFI_ENDPROC
 END(ret_from_fork)
 
-ENTRY(ret_from_kernel_execve)
-	movl %eax, %esp
-	movl $0,PT_EAX(%esp)
+ENTRY(ret_from_kernel_thread)
+	CFI_STARTPROC
+	pushl_cfi %eax
+	call schedule_tail
 	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
+	movl $0,PT_EAX(%esp)
 	jmp syscall_exit
-END(ret_from_kernel_execve)
+	CFI_ENDPROC
+ENDPROC(ret_from_kernel_thread)
 
 /*
  * Interrupt exit functions should be protected against kprobes
@@ -994,21 +1002,6 @@ END(spurious_interrupt_bug)
  */
 	.popsection
 
-ENTRY(ret_from_kernel_thread)
-	CFI_STARTPROC
-	pushl_cfi %eax
-	call schedule_tail
-	GET_THREAD_INFO(%ebp)
-	popl_cfi %eax
-	pushl_cfi $0x0202		# Reset kernel eflags
-	popfl_cfi
-	movl PT_EBP(%esp),%eax
-	call *PT_EBX(%esp)
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-ENDPROC(ret_from_kernel_thread)
-
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
    entrypoint expects, so fix it up before using the normal path. */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 053c9552ffd..e1f98c22003 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -459,15 +459,13 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# move the stack pointer back
+	subq $REST_SKIP, %rsp	# leave space for volatiles
 	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-
+	movl $0, RAX(%rsp)
+	RESTORE_REST
+	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -1214,20 +1212,6 @@ bad_gs:
 	jmp  2b
 	.previous
 
-ENTRY(ret_from_kernel_execve)
-	movq %rdi, %rsp
-	movl $0, RAX(%rsp)
-	// RESTORE_REST
-	movq 0*8(%rsp), %r15
-	movq 1*8(%rsp), %r14
-	movq 2*8(%rsp), %r13
-	movq 3*8(%rsp), %r12
-	movq 4*8(%rsp), %rbp
-	movq 5*8(%rsp), %rbx
-	addq $(6*8), %rsp
-	jmp int_ret_from_sys_call
-END(ret_from_kernel_execve)
-
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
 	CFI_STARTPROC
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index da85b6fc8e8..cab8eb88dd2 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -14,6 +14,7 @@ config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
-- 
cgit v1.2.3-70-g09d2


From b6eea87fc6850d3531a64a27d2323a4498cd4e43 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 12 Oct 2012 11:19:59 +0100
Subject: x86, boot: Explicitly include autoconf.h for hostprogs

The hostprogs need access to the CONFIG_* symbols found in
include/generated/autoconf.h.  But commit abbf1590de22 ("UAPI: Partition
the header include path sets and add uapi/ header directories") replaced
$(LINUXINCLUDE) with $(USERINCLUDE) which doesn't contain the necessary
include paths.

This has the undesirable effect of breaking the EFI boot stub because
the #ifdef CONFIG_EFI_STUB code in arch/x86/boot/tools/build.c is
never compiled.

It should also be noted that because $(USERINCLUDE) isn't exported by
the top-level Makefile it's actually empty in arch/x86/boot/Makefile.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/boot/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ce03476d8c8..ccce0ed67dd 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y		+= video-bios.o
 targets		+= $(setup-y)
 hostprogs-y	:= mkcpustr tools/build
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
+		    -include include/generated/autoconf.h \
 	            -D__EXPORTED_HEADERS__
 
 $(obj)/cpu.o: $(obj)/cpustr.h
-- 
cgit v1.2.3-70-g09d2