From fb341f572d26e0786167cd96b90cc4febed830cf Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Sat, 5 Dec 2009 12:34:11 -0200
Subject: KVM: MMU: remove prefault from invlpg handler

The invlpg prefault optimization breaks Windows 2008 R2 occasionally.

The visible effect is that the invlpg handler instantiates a pte which
is, microseconds later, written with a different gfn by another vcpu.

The OS could have other mechanisms to prevent a present translation from
being used, which the hypervisor is unaware of.

While the documentation states that the cpu is at liberty to prefetch tlb
entries, it looks like this is not heeded, so remove tlb prefetch from
invlpg.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a6017132fba..58a0f1e8859 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -455,8 +455,6 @@ out_unlock:
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	pt_element_t gpte;
-	gpa_t pte_gpa = -1;
 	int level;
 	u64 *sptep;
 	int need_flush = 0;
@@ -470,10 +468,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 		if (level == PT_PAGE_TABLE_LEVEL  ||
 		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
 		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
-			struct kvm_mmu_page *sp = page_header(__pa(sptep));
-
-			pte_gpa = (sp->gfn << PAGE_SHIFT);
-			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 
 			if (is_shadow_present_pte(*sptep)) {
 				rmap_remove(vcpu->kvm, sptep);
@@ -492,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	if (need_flush)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 	spin_unlock(&vcpu->kvm->mmu_lock);
-
-	if (pte_gpa == -1)
-		return;
-	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
-				  sizeof(pt_element_t)))
-		return;
-	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
-		if (mmu_topup_memory_caches(vcpu))
-			return;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
-				  sizeof(pt_element_t), 0);
-	}
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
-- 
cgit v1.2.3-70-g09d2


From 6e24a6eff4571002cd48b99a2b92dc829ce39cb9 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 14 Dec 2009 17:37:35 -0200
Subject: KVM: LAPIC: make sure IRR bitmap is scanned after vm load

The vcpus are initialized with irr_pending set to false, but
loading the LAPIC registers with pending IRR fails to reset
the irr_pending variable.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cd60c0bd1b3..3063a0c4858 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1150,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 	update_divide_count(apic);
 	start_apic_timer(apic);
+	apic->irr_pending = true;
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3-70-g09d2


From dab4b911a5327859bb8f969249c6978c26cd4853 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sun, 6 Dec 2009 18:24:15 +0100
Subject: KVM: x86: Extend KVM_SET_VCPU_EVENTS with selective updates

User space may not want to overwrite asynchronously changing VCPU event
states on write-back. So allow to skip nmi.pending and sipi_vector by
setting corresponding bits in the flags field of kvm_vcpu_events.

[avi: advertise the bits in KVM_GET_VCPU_EVENTS]

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt  | 10 +++++++++-
 arch/x86/include/asm/kvm.h |  4 ++++
 arch/x86/kvm/x86.c         | 12 ++++++++----
 3 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index e1a11416102..2811e452f75 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -685,7 +685,7 @@ struct kvm_vcpu_events {
 		__u8 pad;
 	} nmi;
 	__u32 sipi_vector;
-	__u32 flags;   /* must be zero */
+	__u32 flags;
 };
 
 4.30 KVM_SET_VCPU_EVENTS
@@ -701,6 +701,14 @@ vcpu.
 
 See KVM_GET_VCPU_EVENTS for the data structure.
 
+Fields that may be modified asynchronously by running VCPUs can be excluded
+from the update. These fields are nmi.pending and sipi_vector. Keep the
+corresponding bits in the flags field cleared to suppress overwriting the
+current in-kernel state. The bits are:
+
+KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
+KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+
 
 5. The kvm_run structure
 
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 950df434763..f46b79f6c16 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -254,6 +254,10 @@ struct kvm_reinject_control {
 	__u8 reserved[31];
 };
 
+/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
+#define KVM_VCPUEVENT_VALID_NMI_PENDING	0x00000001
+#define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
+
 /* for KVM_GET/SET_VCPU_EVENTS */
 struct kvm_vcpu_events {
 	struct {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d068966fb2..6651dbf5867 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1913,7 +1913,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
 	events->sipi_vector = vcpu->arch.sipi_vector;
 
-	events->flags = 0;
+	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
+			 | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
 
 	vcpu_put(vcpu);
 }
@@ -1921,7 +1922,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 					      struct kvm_vcpu_events *events)
 {
-	if (events->flags)
+	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
+			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
 		return -EINVAL;
 
 	vcpu_load(vcpu);
@@ -1938,10 +1940,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		kvm_pic_clear_isr_ack(vcpu->kvm);
 
 	vcpu->arch.nmi_injected = events->nmi.injected;
-	vcpu->arch.nmi_pending = events->nmi.pending;
+	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
+		vcpu->arch.nmi_pending = events->nmi.pending;
 	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
 
-	vcpu->arch.sipi_vector = events->sipi_vector;
+	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
+		vcpu->arch.sipi_vector = events->sipi_vector;
 
 	vcpu_put(vcpu);
 
-- 
cgit v1.2.3-70-g09d2