From f56f5369561ccafe0d4a2396179f814454a1571c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Oct 2010 17:56:17 +0200 Subject: KVM: Move KVM context switch into own function gcc 4.5 with some special options is able to duplicate the VMX context switch asm in vmx_vcpu_run(). This results in a compile error because the inline asm sequence uses an on local label. The non local label is needed because other code wants to set up the return address. This patch moves the asm code into an own function and marks that explicitely noinline to avoid this problem. Better would be probably to just move it into an .S file. The diff looks worse than the change really is, it's all just code movement and no logic change. Signed-off-by: Andi Kleen Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 63 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 81fcbe9515c..46c89252f82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3890,33 +3890,17 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) #define Q "l" #endif -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +/* + * We put this into a separate noinline function to prevent the compiler + * from duplicating the code. This is needed because this code + * uses non local labels that cannot be duplicated. + * Do not put any flow control into this function. + * Better would be to put this whole monstrosity into a .S file. + */ +static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return; - - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - asm( + asm volatile( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" "push %%"R"cx \n\t" @@ -4011,6 +3995,35 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #endif ); +} + +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return; + + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + + /* When single-stepping over STI and MOV SS, we must clear the + * corresponding interruptibility bits in the guest state. Otherwise + * vmentry fails as it then expects bit 14 (BS) in pending debug + * exceptions being set, but that's not correct for the guest debugging + * case. */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmx_set_interrupt_shadow(vcpu, 0); + + do_vmx_vcpu_run(vcpu); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | (1 << VCPU_EXREG_PDPTR)); -- cgit v1.2.3-70-g09d2 From ff1fcb9ebd53ee3f21ae117e6952204e465f46d8 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 22 Oct 2010 14:18:15 -0200 Subject: KVM: VMX: remove setting of shadow_base_ptes for EPT The EPT present/writable bits use the same position as normal pagetable bits. Since direct_map passes ACC_ALL to mmu_set_spte, thus always setting the writable bit on sptes, use the generic PT_PRESENT shadow_base_pte. Also pass present/writable error code information from EPT violation to generic pagefault handler. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 46c89252f82..e42727b305c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3476,7 +3476,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3); } static u64 ept_rsvd_mask(u64 spte, int level) @@ -4409,8 +4409,6 @@ static int __init vmx_init(void) if (enable_ept) { bypass_guest_pf = 0; - kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); -- cgit v1.2.3-70-g09d2 From ec25d5e66ee152e371fd7046f3f8441859579aea Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 1 Nov 2010 15:35:01 +0200 Subject: KVM: handle exit due to INVD in VMX Currently the exit is unhandled, so guest halts with error if it tries to execute INVD instruction. Call into emulator when INVD instruction is executed by a guest instead. This instruction is not needed by ordinary guests, but firmware (like OpenBIOS) use it and fail. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9f0cbd987d5..42d959056f9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -239,6 +239,7 @@ enum vmcs_field { #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 #define EXIT_REASON_INVLPG 14 #define EXIT_REASON_RDPMC 15 #define EXIT_REASON_RDTSC 16 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e42727b305c..12c30733e23 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3349,6 +3349,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) return 1; } +static int handle_invd(struct kvm_vcpu *vcpu) +{ + return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; +} + static int handle_invlpg(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -3649,6 +3654,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MSR_WRITE] = handle_wrmsr, [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, [EXIT_REASON_HLT] = handle_halt, + [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_VMCLEAR] = handle_vmx_insn, -- cgit v1.2.3-70-g09d2 From 30bd0c4c6c5aecc338ebf32e3a6e01c98f0a0b43 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 1 Nov 2010 23:20:48 +0200 Subject: KVM: VMX: Disallow NMI while blocked by STI While not mandated by the spec, Linux relies on NMI being blocked by an IF-enabling STI. VMX also refuses to enter a guest in this state, at least on some implementations. Disallow NMI while blocked by STI by checking for the condition, and requesting an interrupt window exit if it occurs. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 12c30733e23..8087c4d1a13 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2787,6 +2787,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } + if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { + enable_irq_window(vcpu); + return; + } cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); @@ -2849,7 +2853,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) return 0; return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI + | GUEST_INTR_STATE_NMI)); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From f9335afea5d649693aee1ec8af2cc8ccf376f5a9 Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Wed, 17 Nov 2010 11:40:17 +0800 Subject: KVM: VMX: Inform user about INTEL_TXT dependency Inform user to either disable TXT in the BIOS or do TXT launch with tboot before enabling KVM since some BIOSes do not set FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX bit when TXT is enabled. Signed-off-by: Shane Wang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8087c4d1a13..92612fb162d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1305,8 +1305,11 @@ static __init int vmx_disabled_by_bios(void) && tboot_enabled()) return 1; if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) + && !tboot_enabled()) { + printk(KERN_WARNING "kvm: disable TXT in the BIOS or " + " activate TXT before enabling KVM\n"); return 1; + } } return 0; -- cgit v1.2.3-70-g09d2 From 104f226bfd0a607ca0e804ae4907555374f72cd9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 18 Nov 2010 13:12:52 +0200 Subject: KVM: VMX: Fold __vmx_vcpu_run() into vmx_vcpu_run() cea15c2 ("KVM: Move KVM context switch into own function") split vmx_vcpu_run() to prevent multiple copies of the context switch from being generated (causing problems due to a label). This patch folds them back together again and adds the __noclone attribute to prevent the label from being duplicated. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 63 ++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 38 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 92612fb162d..6bf807adbeb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3904,17 +3904,33 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) #define Q "l" #endif -/* - * We put this into a separate noinline function to prevent the compiler - * from duplicating the code. This is needed because this code - * uses non local labels that cannot be duplicated. - * Do not put any flow control into this function. - * Better would be to put this whole monstrosity into a .S file. - */ -static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu) +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - asm volatile( + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return; + + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + + /* When single-stepping over STI and MOV SS, we must clear the + * corresponding interruptibility bits in the guest state. Otherwise + * vmentry fails as it then expects bit 14 (BS) in pending debug + * exceptions being set, but that's not correct for the guest debugging + * case. */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmx_set_interrupt_shadow(vcpu, 0); + + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" "push %%"R"cx \n\t" @@ -4009,35 +4025,6 @@ static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu) , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #endif ); -} - -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return; - - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - do_vmx_vcpu_run(vcpu); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | (1 << VCPU_EXREG_PDPTR)); -- cgit v1.2.3-70-g09d2 From aa17911e3c21b63e3bf94c580ed029d6dad816b4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 17 Nov 2010 18:44:19 +0200 Subject: KVM: Record instruction set in kvm_exit tracepoint exit_reason's meaning depend on the instruction set; record it so a trace taken on one machine can be interpreted on another. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/trace.h | 9 +++++++-- arch/x86/kvm/vmx.c | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7c7f03b5f39..78a23086e16 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2980,7 +2980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu); + trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) vcpu->arch.cr0 = svm->vmcb->save.cr0; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a6544b8e7c0..10610229b24 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -178,21 +178,26 @@ TRACE_EVENT(kvm_apic, #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) +#define KVM_ISA_VMX 1 +#define KVM_ISA_SVM 2 + /* * Tracepoint for kvm guest exit: */ TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), - TP_ARGS(exit_reason, vcpu), + TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), + TP_ARGS(exit_reason, vcpu, isa), TP_STRUCT__entry( __field( unsigned int, exit_reason ) __field( unsigned long, guest_rip ) + __field( u32, isa ) ), TP_fast_assign( __entry->exit_reason = exit_reason; __entry->guest_rip = kvm_rip_read(vcpu); + __entry->isa = isa; ), TP_printk("reason %s rip 0x%lx", diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6bf807adbeb..24959105d7f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3700,7 +3700,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu); + trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) -- cgit v1.2.3-70-g09d2 From 586f9607962cd982293759a4e95ff06e75be5225 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 18 Nov 2010 13:09:54 +0200 Subject: KVM: Add instruction-set-specific exit qualifications to kvm_exit trace The exit reason alone is insufficient to understand exactly why an exit occured; add ISA-specific trace parameters for additional information. Because fetching these parameters is expensive on vmx, and because these parameters are fetched even if tracing is disabled, we fetch the parameters via a callback instead of as traditional trace arguments. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/trace.h | 8 ++++++-- arch/x86/kvm/vmx.c | 8 ++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f1e8d5b99f5..3cc80c47800 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -594,6 +594,7 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 78a23086e16..28274cf307b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2974,6 +2974,14 @@ void dump_vmcb(struct kvm_vcpu *vcpu) } +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + *info1 = control->exit_info_1; + *info2 = control->exit_info_2; +} + static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3684,7 +3692,9 @@ static struct kvm_x86_ops svm_x86_ops = { .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, + .get_exit_info = svm_get_exit_info, .exit_reasons_str = svm_exit_reasons_str, + .get_lpage_level = svm_get_lpage_level, .cpuid_update = svm_cpuid_update, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 10610229b24..1357d7cf4ec 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -192,18 +192,22 @@ TRACE_EVENT(kvm_exit, __field( unsigned int, exit_reason ) __field( unsigned long, guest_rip ) __field( u32, isa ) + __field( u64, info1 ) + __field( u64, info2 ) ), TP_fast_assign( __entry->exit_reason = exit_reason; __entry->guest_rip = kvm_rip_read(vcpu); __entry->isa = isa; + kvm_x86_ops->get_exit_info(vcpu, &__entry->info1, + &__entry->info2); ), - TP_printk("reason %s rip 0x%lx", + TP_printk("reason %s rip 0x%lx info %llx %llx", ftrace_print_symbols_seq(p, __entry->exit_reason, kvm_x86_ops->exit_reasons_str), - __entry->guest_rip) + __entry->guest_rip, __entry->info1, __entry->info2) ); /* diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 24959105d7f..ab05ff68bcd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3690,6 +3690,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); +static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + *info1 = vmcs_readl(EXIT_QUALIFICATION); + *info2 = vmcs_read32(VM_EXIT_INTR_INFO); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -4334,7 +4340,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_tdp_level = get_ept_level, .get_mt_mask = vmx_get_mt_mask, + .get_exit_info = vmx_get_exit_info, .exit_reasons_str = vmx_exit_reasons_str, + .get_lpage_level = vmx_get_lpage_level, .cpuid_update = vmx_cpuid_update, -- cgit v1.2.3-70-g09d2 From a295673aba42895997a6c1be87f467a7cfc0f332 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 2 Dec 2010 17:55:23 +0200 Subject: KVM: VMX: Return 0 from a failed VMREAD If we execute VMREAD during reboot we'll just skip over it. Instead of returning garbage, return 0, which has a much smaller chance of confusing the code. Otherwise we risk a flood of debug printk()s which block the reboot process if a serial console or netconsole is enabled. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab05ff68bcd..f3693ca9398 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -565,10 +565,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) static unsigned long vmcs_readl(unsigned long field) { - unsigned long value; + unsigned long value = 0; asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) - : "=a"(value) : "d"(field) : "cc"); + : "+a"(value) : "d"(field) : "cc"); return value; } -- cgit v1.2.3-70-g09d2 From 443381a828910efa3d71ba4491d180f2d0bb4212 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 6 Dec 2010 10:53:38 -0600 Subject: KVM: VMX: add module parameter to avoid trapping HLT instructions (v5) In certain use-cases, we want to allocate guests fixed time slices where idle guest cycles leave the machine idling. There are many approaches to achieve this but the most direct is to simply avoid trapping the HLT instruction which lets the guest directly execute the instruction putting the processor to sleep. Introduce this as a module-level option for kvm-vmx.ko since if you do this for one guest, you probably want to do it for all. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 6 ++++++ arch/x86/kvm/vmx.c | 25 +++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 42d959056f9..9642c2216f3 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -297,6 +297,12 @@ enum vmcs_field { #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +/* GUEST_ACTIVITY_STATE flags */ +#define GUEST_ACTIVITY_ACTIVE 0 +#define GUEST_ACTIVITY_HLT 1 +#define GUEST_ACTIVITY_SHUTDOWN 2 +#define GUEST_ACTIVITY_WAIT_SIPI 3 + /* * Exit Qualifications for MOV for Control Register Access */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3693ca9398..c1952603d58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); static int __read_mostly vmm_exclusive = 1; module_param(vmm_exclusive, bool, S_IRUGO); +static int __read_mostly yield_on_hlt = 1; +module_param(yield_on_hlt, bool, S_IRUGO); + #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) #define KVM_GUEST_CR0_MASK \ @@ -1009,6 +1012,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, then + * the instruction is already executing and RIP has already been + * advanced. */ + if (!yield_on_hlt && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); +} + static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code, bool reinject) @@ -1035,6 +1049,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -1419,7 +1434,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_pin_based_exec_control) < 0) return -EIO; - min = CPU_BASED_HLT_EXITING | + min = #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | @@ -1432,6 +1447,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING; + + if (yield_on_hlt) + min |= CPU_BASED_HLT_EXITING; + opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -2728,7 +2747,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_ACTIVITY_STATE, 0); + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); @@ -2821,6 +2840,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -2848,6 +2868,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + vmx_clear_hlt(vcpu); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From eea1cff9ab732ea56358ff5e1bd8b99db2e8402d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:00 +0100 Subject: KVM: x86: fix CR8 handling The handling of CR8 writes in KVM is currently somewhat cumbersome. This patch makes it look like the other CR register handlers and fixes a possible issue in VMX, where the RIP would be incremented despite an injected #GP. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 18 ++++++++---------- 4 files changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4461429957a..cb5cad2f2d4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -661,7 +661,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 73461b1cfb0..3d4b88af50f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2676,16 +2676,17 @@ static int cr0_write_interception(struct vcpu_svm *svm) static int cr8_write_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + int r; u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - emulate_instruction(&svm->vcpu, 0, 0, 0); + r = emulate_instruction(&svm->vcpu, 0, 0, 0); if (irqchip_in_kernel(svm->vcpu.kvm)) { clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); - return 1; + return r == EMULATE_DONE; } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) - return 1; + return r == EMULATE_DONE; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c1952603d58..8e87bae09a7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3185,8 +3185,8 @@ static int handle_cr(struct kvm_vcpu *vcpu) case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); u8 cr8 = kvm_register_read(vcpu, reg); - kvm_set_cr8(vcpu, cr8); - skip_emulated_instruction(vcpu); + err = kvm_set_cr8(vcpu, cr8); + complete_insn_gp(vcpu, err); if (irqchip_in_kernel(vcpu->kvm)) return 1; if (cr8_prev <= cr8) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f569da8ff83..2dbf68cd46e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -662,7 +662,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } EXPORT_SYMBOL_GPL(kvm_set_cr3); -int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) return 1; @@ -672,12 +672,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) vcpu->arch.cr8 = cr8; return 0; } - -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) -{ - if (__kvm_set_cr8(vcpu, cr8)) - kvm_inject_gp(vcpu, 0); -} EXPORT_SYMBOL_GPL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) @@ -4104,7 +4098,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); break; case 8: - res = __kvm_set_cr8(vcpu, val & 0xfUL); + res = kvm_set_cr8(vcpu, val); break; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); @@ -5381,8 +5375,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } /* re-sync apic's tpr */ - if (!irqchip_in_kernel(vcpu->kvm)) - kvm_set_cr8(vcpu, kvm_run->cr8); + if (!irqchip_in_kernel(vcpu->kvm)) { + if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { + r = -EINVAL; + goto out; + } + } if (vcpu->arch.pio.count || vcpu->mmio_needed) { if (vcpu->mmio_needed) { -- cgit v1.2.3-70-g09d2 From db8fcefaa704ccb40b6dcd24e3b75bad3ce7dde3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:01 +0100 Subject: KVM: move complete_insn_gp() into x86.c move the complete_insn_gp() helper function out of the VMX part into the generic x86 part to make it usable by SVM. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx.c | 16 ++++------------ arch/x86/kvm/x86.c | 9 +++++++++ 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cb5cad2f2d4..cd4a990e8a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -828,4 +828,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e87bae09a7..fd8ffde7375 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3147,14 +3147,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xc1; } -static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) -{ - if (err) - kvm_inject_gp(vcpu, 0); - else - skip_emulated_instruction(vcpu); -} - static int handle_cr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification, val; @@ -3172,21 +3164,21 @@ static int handle_cr(struct kvm_vcpu *vcpu) switch (cr) { case 0: err = kvm_set_cr0(vcpu, val); - complete_insn_gp(vcpu, err); + kvm_complete_insn_gp(vcpu, err); return 1; case 3: err = kvm_set_cr3(vcpu, val); - complete_insn_gp(vcpu, err); + kvm_complete_insn_gp(vcpu, err); return 1; case 4: err = kvm_set_cr4(vcpu, val); - complete_insn_gp(vcpu, err); + kvm_complete_insn_gp(vcpu, err); return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); u8 cr8 = kvm_register_read(vcpu, reg); err = kvm_set_cr8(vcpu, cr8); - complete_insn_gp(vcpu, err); + kvm_complete_insn_gp(vcpu, err); if (irqchip_in_kernel(vcpu->kvm)) return 1; if (cr8_prev <= cr8) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2dbf68cd46e..1d54cb7f335 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -334,6 +334,15 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) } EXPORT_SYMBOL_GPL(kvm_requeue_exception); +void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) +{ + if (err) + kvm_inject_gp(vcpu, 0); + else + kvm_x86_ops->skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); + void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { ++vcpu->stat.pf_guest; -- cgit v1.2.3-70-g09d2 From 51d8b66199e94284e7725a79eae4a38de4b80d54 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:02 +0100 Subject: KVM: cleanup emulate_instruction emulate_instruction had many callers, but only one used all parameters. One parameter was unused, another one is now hidden by a wrapper function (required for a future addition anyway), so most callers use now a shorter parameter list. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 11 +++++++++-- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/svm.c | 14 +++++++------- arch/x86/kvm/vmx.c | 12 ++++++------ arch/x86/kvm/x86.c | 11 +++++------ 5 files changed, 28 insertions(+), 22 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cd4a990e8a1..de00b6026b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,8 +634,15 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, u16 error_code, int emulation_type); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, + unsigned long cr2, int emulation_type); + +static inline int emulate_instruction(struct kvm_vcpu *vcpu, + int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type); +} + void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 475a1225f6e..01c5a104031 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3348,7 +3348,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) if (r) goto out; - er = emulate_instruction(vcpu, cr2, error_code, 0); + er = x86_emulate_instruction(vcpu, cr2, 0); switch (er) { case EMULATE_DONE: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3d4b88af50f..90d06582aac 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -475,7 +475,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->next_rip = svm->vmcb->control.next_rip; if (!svm->next_rip) { - if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -1586,7 +1586,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; - er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -1703,7 +1703,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string || in) - return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -2648,12 +2648,12 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } static int emulate_on_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } static int cr0_write_interception(struct vcpu_svm *svm) @@ -2661,7 +2661,7 @@ static int cr0_write_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; int r; - r = emulate_instruction(&svm->vcpu, 0, 0, 0); + r = emulate_instruction(&svm->vcpu, 0); if (svm->nested.vmexit_rip) { kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); @@ -2680,7 +2680,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - r = emulate_instruction(&svm->vcpu, 0, 0, 0); + r = emulate_instruction(&svm->vcpu, 0); if (irqchip_in_kernel(svm->vcpu.kvm)) { clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); return r == EMULATE_DONE; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd8ffde7375..f3c60fb8d95 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2939,7 +2939,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) - if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) + if (emulate_instruction(vcpu, 0) == EMULATE_DONE) return 1; /* * Forward all other exceptions that are valid in real mode. @@ -3036,7 +3036,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -3127,7 +3127,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string || in) - return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -3372,7 +3372,7 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -3403,7 +3403,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) static int handle_apic_access(struct kvm_vcpu *vcpu) { - return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_task_switch(struct kvm_vcpu *vcpu) @@ -3618,7 +3618,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) return handle_interrupt_window(&vmx->vcpu); - err = emulate_instruction(vcpu, 0, 0, 0); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_DO_MMIO) { ret = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d54cb7f335..a6fcb76196b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4363,10 +4363,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) return false; } -int emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - u16 error_code, - int emulation_type) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, + unsigned long cr2, + int emulation_type) { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; @@ -4474,7 +4473,7 @@ done: return r; } -EXPORT_SYMBOL_GPL(emulate_instruction); +EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { @@ -5398,7 +5397,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r != EMULATE_DONE) { r = 0; -- cgit v1.2.3-70-g09d2 From dc25e89e07d5ef31c476117d2c76b34dbb22196c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:07 +0100 Subject: KVM: SVM: copy instruction bytes from VMCB In case of a nested page fault or an intercepted #PF newer SVM implementations provide a copy of the faulting instruction bytes in the VMCB. Use these bytes to feed the instruction emulator and avoid the costly guest instruction fetch in this case. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/include/asm/kvm_host.h | 9 +++++---- arch/x86/include/asm/svm.h | 4 +++- arch/x86/kvm/emulate.c | 7 +++++-- arch/x86/kvm/mmu.c | 5 +++-- arch/x86/kvm/svm.c | 4 +++- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 6 ++++-- 8 files changed, 26 insertions(+), 15 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index bf70ecea397..8e37deb1eb3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -265,7 +265,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif -int x86_decode_insn(struct x86_emulate_ctxt *ctxt); +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index de00b6026b7..6268f6ce643 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,13 +634,13 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, int emulation_type); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type); + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -721,7 +721,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_fix_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f0ffb818408..f2b83bc7d78 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -83,7 +83,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 clean; u32 reserved_5; u64 next_rip; - u8 reserved_6[816]; + u8 insn_len; + u8 insn_bytes[15]; + u8 reserved_6[800]; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 36534ecaf59..caf966781d2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2610,7 +2610,7 @@ done: } int -x86_decode_insn(struct x86_emulate_ctxt *ctxt) +x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) { struct x86_emulate_ops *ops = ctxt->ops; struct decode_cache *c = &ctxt->decode; @@ -2621,7 +2621,10 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt) struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; - c->fetch.start = c->fetch.end = c->eip; + c->fetch.start = c->eip; + c->fetch.end = c->fetch.start + insn_len; + if (insn_len > 0) + memcpy(c->fetch.data, insn, insn_len); ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01c5a104031..ea6063d9242 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3330,7 +3330,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) } } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, + void *insn, int insn_len) { int r; enum emulation_result er; @@ -3348,7 +3349,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) if (r) goto out; - er = x86_emulate_instruction(vcpu, cr2, 0); + er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); switch (er) { case EMULATE_DONE: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a04c01e324b..af4b911a8be 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1527,7 +1527,9 @@ static int pf_interception(struct vcpu_svm *svm) trace_kvm_page_fault(fault_address, error_code); if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); - r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); + r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, + svm->vmcb->control.insn_bytes, + svm->vmcb->control.insn_len); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: svm->apf_reason = 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3c60fb8d95..736f83955ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3055,7 +3055,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, cr2); - return kvm_mmu_page_fault(vcpu, cr2, error_code); + return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); } if (vmx->rmode.vm86_active && @@ -3502,7 +3502,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3); + return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); } static u64 ept_rsvd_mask(u64 spte, int level) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6fcb76196b..7ad9cda8ff3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4365,7 +4365,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - int emulation_type) + int emulation_type, + void *insn, + int insn_len) { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; @@ -4386,7 +4388,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, vcpu->arch.emulate_ctxt.have_exception = false; vcpu->arch.emulate_ctxt.perm_ok = false; - r = x86_decode_insn(&vcpu->arch.emulate_ctxt); + r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); if (r == X86EMUL_PROPAGATE_FAULT) goto done; -- cgit v1.2.3-70-g09d2 From 110312c84b5fbd4daf5de2417fa8ab5ec883858d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Dec 2010 12:54:20 +0200 Subject: KVM: VMX: Optimize atomic EFER load When NX is enabled on the host but not on the guest, we use the entry/exit msr load facility, which is slow. Optimize it to use entry/exit efer load, which is ~1200 cycles faster. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 736f83955ce..a713c69bfce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -191,6 +191,8 @@ static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; +static bool cpu_has_load_ia32_efer; + static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -664,6 +666,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) unsigned i; struct msr_autoload *m = &vmx->msr_autoload; + if (msr == MSR_EFER && cpu_has_load_ia32_efer) { + vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); + vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); + return; + } + for (i = 0; i < m->nr; ++i) if (m->guest[i].index == msr) break; @@ -683,6 +691,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, unsigned i; struct msr_autoload *m = &vmx->msr_autoload; + if (msr == MSR_EFER && cpu_has_load_ia32_efer) { + vmcs_write64(GUEST_IA32_EFER, guest_val); + vmcs_write64(HOST_IA32_EFER, host_val); + vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); + vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); + return; + } + for (i = 0; i < m->nr; ++i) if (m->guest[i].index == msr) break; @@ -1418,6 +1434,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, return 0; } +static __init bool allow_1_setting(u32 msr, u32 ctl) +{ + u32 vmx_msr_low, vmx_msr_high; + + rdmsr(msr, vmx_msr_low, vmx_msr_high); + return vmx_msr_high & ctl; +} + static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) { u32 vmx_msr_low, vmx_msr_high; @@ -1532,6 +1556,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; + cpu_has_load_ia32_efer = + allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, + VM_ENTRY_LOAD_IA32_EFER) + && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, + VM_EXIT_LOAD_IA32_EFER); + return 0; } -- cgit v1.2.3-70-g09d2 From 16d8f72f70a85ce333788204a69318827130ca75 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Dec 2010 16:51:50 +0200 Subject: KVM: VMX: Correct asm constraint in vmcs_load()/vmcs_clear() 'error' is byte sized, so use a byte register constraint. Acked-by: Randy Dunlap Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a713c69bfce..67c08527351 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -477,7 +477,7 @@ static void vmcs_clear(struct vmcs *vmcs) u8 error; asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" - : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) + : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", @@ -490,7 +490,7 @@ static void vmcs_load(struct vmcs *vmcs) u8 error; asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" - : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) + : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", -- cgit v1.2.3-70-g09d2 From 9f8fe5043fd26627c2fa2e9a41896885e675000b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 5 Dec 2010 17:30:00 +0200 Subject: KVM: Replace reads of vcpu->arch.cr3 by an accessor This allows us to keep cr3 in the VMCS, later on. Signed-off-by: Avi Kivity --- arch/x86/kvm/kvm_cache_regs.h | 5 +++++ arch/x86/kvm/mmu.c | 6 +++--- arch/x86/kvm/svm.c | 10 +++++----- arch/x86/kvm/vmx.c | 7 ++++--- arch/x86/kvm/x86.c | 19 ++++++++++--------- 5 files changed, 27 insertions(+), 20 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 95ac3afa6e6..a6bf8db326f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -73,6 +73,11 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) return vcpu->arch.cr4 & mask; } +static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cr3; +} + static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, ~0UL); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a2127f82e78..e558795fccd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2727,13 +2727,13 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) static void paging_new_cr3(struct kvm_vcpu *vcpu) { - pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); + pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); mmu_free_roots(vcpu); } static unsigned long get_cr3(struct kvm_vcpu *vcpu) { - return vcpu->arch.cr3; + return kvm_read_cr3(vcpu); } static void inject_page_fault(struct kvm_vcpu *vcpu, @@ -3637,7 +3637,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); + (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); return 1; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index af4b911a8be..a7b04c0bd7a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1174,7 +1174,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) switch (reg) { case VCPU_EXREG_PDPTR: BUG_ON(!npt_enabled); - load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); + load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); break; default: BUG(); @@ -2116,7 +2116,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.idtr = vmcb->save.idtr; nested_vmcb->save.efer = svm->vcpu.arch.efer; nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); - nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; + nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = vmcb->save.rflags; @@ -2311,7 +2311,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (npt_enabled) hsave->save.cr3 = vmcb->save.cr3; else - hsave->save.cr3 = svm->vcpu.arch.cr3; + hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); copy_vmcb_control_area(hsave, vmcb); @@ -2715,7 +2715,7 @@ static int cr_interception(struct vcpu_svm *svm) val = svm->vcpu.arch.cr2; break; case 3: - val = svm->vcpu.arch.cr3; + val = kvm_read_cr3(&svm->vcpu); break; case 4: val = kvm_read_cr4(&svm->vcpu); @@ -3693,7 +3693,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) mark_dirty(svm->vmcb, VMCB_NPT); /* Also sync guest cr3 here in case we live migrate */ - svm->vmcb->save.cr3 = vcpu->arch.cr3; + svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); mark_dirty(svm->vmcb, VMCB_CR); svm_flush_tlb(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 67c08527351..141956ebf79 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1989,7 +1989,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(cr3); vmcs_write64(EPT_POINTER, eptp); - guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : + guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : vcpu->kvm->arch.ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -3227,8 +3227,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) case 1: /*mov from cr*/ switch (cr) { case 3: - kvm_register_write(vcpu, reg, vcpu->arch.cr3); - trace_kvm_cr_read(cr, vcpu->arch.cr3); + val = kvm_read_cr3(vcpu); + kvm_register_write(vcpu, reg, val); + trace_kvm_cr_read(cr, val); skip_emulated_instruction(vcpu); return 1; case 8: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7ad9cda8ff3..6e50314d64f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -473,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_avail)) return true; - gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT; - offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1); + gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT; + offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1); r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) @@ -519,7 +519,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } else #endif if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, - vcpu->arch.cr3)) + kvm_read_cr3(vcpu))) return 1; } @@ -611,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) - && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)) + && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, + kvm_read_cr3(vcpu))) return 1; if (cr4 & X86_CR4_VMXE) @@ -631,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { - if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { + if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); kvm_mmu_flush_tlb(vcpu); return 0; @@ -4073,7 +4074,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) value = vcpu->arch.cr2; break; case 3: - value = vcpu->arch.cr3; + value = kvm_read_cr3(vcpu); break; case 4: value = kvm_read_cr4(vcpu); @@ -5512,7 +5513,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; - sregs->cr3 = vcpu->arch.cr3; + sregs->cr3 = kvm_read_cr3(vcpu); sregs->cr4 = kvm_read_cr4(vcpu); sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.efer; @@ -5580,7 +5581,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_x86_ops->set_gdt(vcpu, &dt); vcpu->arch.cr2 = sregs->cr2; - mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; + mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; vcpu->arch.cr3 = sregs->cr3; kvm_set_cr8(vcpu, sregs->cr8); @@ -5598,7 +5599,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (sregs->cr4 & X86_CR4_OSXSAVE) update_cpuid(vcpu); if (!is_long_mode(vcpu) && is_pae(vcpu)) { - load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); + load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } -- cgit v1.2.3-70-g09d2 From aff48baa34c033318ad322ecbf2e4bcd891b29ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 5 Dec 2010 18:56:11 +0200 Subject: KVM: Fetch guest cr3 from hardware on demand Instead of syncing the guest cr3 every exit, which is expensince on vmx with ept enabled, sync it only on demand. [sheng: fix incorrect cr3 seen by Windows XP] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/kvm_cache_regs.h | 2 ++ arch/x86/kvm/svm.c | 5 +++++ arch/x86/kvm/vmx.c | 18 ++++++++++++------ arch/x86/kvm/x86.c | 2 ++ 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6268f6ce643..95f026be8b5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -117,6 +117,7 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR3, }; enum { @@ -533,6 +534,7 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); + void (*decache_cr3)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index a6bf8db326f..3377d53fcd3 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -75,6 +75,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + kvm_x86_ops->decache_cr3(vcpu); return vcpu->arch.cr3; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a7b04c0bd7a..25bd1bc5aad 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1327,6 +1327,10 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { } +static void svm_decache_cr3(struct kvm_vcpu *vcpu) +{ +} + static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { } @@ -3871,6 +3875,7 @@ static struct kvm_x86_ops svm_x86_ops = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, + .decache_cr3 = svm_decache_cr3, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr3 = svm_set_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 141956ebf79..1896cada805 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -180,6 +180,7 @@ static int init_rmode(struct kvm *kvm); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); +static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1866,6 +1867,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; } +static void vmx_decache_cr3(struct kvm_vcpu *vcpu) +{ + if (enable_ept && is_paging(vcpu)) + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); +} + static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; @@ -1909,6 +1917,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { + vmx_decache_cr3(vcpu); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, @@ -3756,11 +3765,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required && emulate_invalid_guest_state) return handle_invalid_guest_state(vcpu); - /* Access CR3 don't cause VMExit in paging mode, so we need - * to sync with guest real CR3. */ - if (enable_ept && is_paging(vcpu)) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason @@ -4077,7 +4081,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_PDPTR)); + | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); @@ -4344,6 +4349,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, + .decache_cr3 = vmx_decache_cr3, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6e50314d64f..fa708c9a743 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -667,6 +667,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) return 1; vcpu->arch.cr3 = cr3; + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); vcpu->arch.mmu.new_cr3(vcpu); return 0; } @@ -5583,6 +5584,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = sregs->cr2; mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; vcpu->arch.cr3 = sregs->cr3; + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); kvm_set_cr8(vcpu, sregs->cr8); -- cgit v1.2.3-70-g09d2 From 444e863d13373b958ec6b133dcecf140d6c6c2ab Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 27 Dec 2010 17:25:04 +0200 Subject: KVM: VMX: when entering real mode align segment base to 16 bytes VMX checks that base is equal segment shifted 4 bits left. Otherwise guest entry fails. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1896cada805..bf89ec2cfb8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1736,9 +1736,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) save->limit = vmcs_read32(sf->limit); save->ar = vmcs_read32(sf->ar_bytes); vmcs_write16(sf->selector, save->base >> 4); - vmcs_write32(sf->base, save->base & 0xfffff); + vmcs_write32(sf->base, save->base & 0xffff0); vmcs_write32(sf->limit, 0xffff); vmcs_write32(sf->ar_bytes, 0xf3); + if (save->base & 0xf) + printk_once(KERN_WARNING "kvm: segment base is not paragraph" + " aligned when entering protected mode (seg=%d)", + seg); } static void enter_rmode(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2