KVM: Avoid killing userspace through guest SRAO MCE on unmapped pages

In common cases, guest SRAO MCE will cause corresponding poisoned page be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay the MCE to guest OS. But it is reported that if the poisoned page is accessed in guest after unmapping and before MCE is relayed to guest OS, userspace will be killed. The reason is as follows. Because poisoned page has been un-mapped, guest access will cause guest exit and kvm_mmu_page_fault will be called. kvm_mmu_page_fault can not get the poisoned page for fault address, so kernel and user space MMIO processing is tried in turn. In user MMIO processing, poisoned page is accessed again, then userspace is killed by force_sig_info. To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM and do not try kernel and user space MMIO processing for poisoned page. [xiao: fix warning introduced by avi] Reported-by: Max Asbock <masbock@linux.vnet.ibm.com> Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Huang Ying <ying.huang@intel.com> 2010-05-31 14:28:19 +0800
committer: Avi Kivity <avi@redhat.com> 2010-08-01 10:35:26 +0300
commit: bf998156d24bcb127318ad5bf531ac3bdfcd6449 (patch)
tree: 616c19474d7cb626ff9eebc54f6753563a4322cd /virt/kvm/kvm_main.c
parent: 540ad6b62b3a188a53b51cac81d8a60d40e29fbd (diff)
1 files changed, 28 insertions, 2 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f032806a212..187aa8d984a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,6 +92,9 @@ static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
 
+struct page *hwpoison_page;
+pfn_t hwpoison_pfn;
+
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -810,16 +813,22 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
 
 int is_error_page(struct page *page)
 {
-	return page == bad_page;
+	return page == bad_page || page == hwpoison_page;
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
 int is_error_pfn(pfn_t pfn)
 {
-	return pfn == bad_pfn;
+	return pfn == bad_pfn || pfn == hwpoison_pfn;
 }
 EXPORT_SYMBOL_GPL(is_error_pfn);
 
+int is_hwpoison_pfn(pfn_t pfn)
+{
+	return pfn == hwpoison_pfn;
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
+
 static inline unsigned long bad_hva(void)
 {
 	return PAGE_OFFSET;
@@ -945,6 +954,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		if (is_hwpoison_address(addr)) {
+			get_page(hwpoison_page);
+			return page_to_pfn(hwpoison_page);
+		}
+
 		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, addr);
 
@@ -2197,6 +2211,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 	bad_pfn = page_to_pfn(bad_page);
 
+	hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+	if (hwpoison_page == NULL) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
+	hwpoison_pfn = page_to_pfn(hwpoison_page);
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -2269,6 +2292,8 @@ out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
+	if (hwpoison_page)
+		__free_page(hwpoison_page);
 	__free_page(bad_page);
 out:
 	kvm_arch_exit();
@@ -2290,6 +2315,7 @@ void kvm_exit(void)
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	free_cpumask_var(cpus_hardware_enabled);
+	__free_page(hwpoison_page);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
author	Huang Ying <ying.huang@intel.com>	2010-05-31 14:28:19 +0800
committer	Avi Kivity <avi@redhat.com>	2010-08-01 10:35:26 +0300
commit	bf998156d24bcb127318ad5bf531ac3bdfcd6449 (patch)
tree	616c19474d7cb626ff9eebc54f6753563a4322cd /virt/kvm/kvm_main.c
parent	540ad6b62b3a188a53b51cac81d8a60d40e29fbd (diff)