From 851ba6922ac575b749f63dee0ae072808163ba6a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 24 Aug 2009 11:10:17 +0300 Subject: KVM: Don't pass kvm_run arguments They're just copies of vcpu->run, which is readily accessible. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 102 ++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 48 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91..92048a626d4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -286,7 +286,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm->next_rip) { - if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -1180,7 +1180,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } } -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int pf_interception(struct vcpu_svm *svm) { u64 fault_address; u32 error_code; @@ -1194,8 +1194,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int db_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->vcpu.arch.singlestep) { @@ -1223,25 +1225,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int bp_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; kvm_run->debug.arch.exception = BP_VECTOR; return 0; } -static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int ud_interception(struct vcpu_svm *svm) { int er; - er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nm_interception(struct vcpu_svm *svm) { svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) @@ -1251,7 +1255,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int mc_interception(struct vcpu_svm *svm) { /* * On an #MC intercept the MCE handler is not called automatically in @@ -1264,8 +1268,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int shutdown_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. @@ -1277,7 +1283,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 0; } -static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int io_interception(struct vcpu_svm *svm) { u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ int size, in, string; @@ -1291,7 +1297,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (string) { if (emulate_instruction(&svm->vcpu, - kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) + 0, 0, 0) == EMULATE_DO_MMIO) return 0; return 1; } @@ -1301,33 +1307,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; skip_emulated_instruction(&svm->vcpu); - return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); + return kvm_emulate_pio(&svm->vcpu, in, size, port); } -static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nmi_interception(struct vcpu_svm *svm) { return 1; } -static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int intr_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.irq_exits; return 1; } -static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nop_on_interception(struct vcpu_svm *svm) { return 1; } -static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } -static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -1837,7 +1843,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; } -static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1857,7 +1863,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1877,7 +1883,7 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmrun_interception(struct vcpu_svm *svm) { nsvm_printk("VMrun\n"); @@ -1907,7 +1913,7 @@ failed: return 1; } -static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int stgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1920,7 +1926,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int clgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1937,7 +1943,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); @@ -1950,15 +1956,13 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invalid_op_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int task_switch_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; int reason; @@ -2008,14 +2012,14 @@ static int task_switch_interception(struct vcpu_svm *svm, return kvm_task_switch(&svm->vcpu, tss_selector, reason); } -static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cpuid_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; kvm_emulate_cpuid(&svm->vcpu); return 1; } -static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); @@ -2023,26 +2027,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpg_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int emulate_on_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int emulate_on_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cr8_write_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); + emulate_instruction(&svm->vcpu, 0, 0, 0); if (irqchip_in_kernel(svm->vcpu.kvm)) { svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; return 1; @@ -2128,7 +2133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) return 0; } -static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int rdmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data; @@ -2221,7 +2226,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) return 0; } -static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int wrmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) @@ -2237,17 +2242,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int msr_interception(struct vcpu_svm *svm) { if (svm->vmcb->control.exit_info_1) - return wrmsr_interception(svm, kvm_run); + return wrmsr_interception(svm); else - return rdmsr_interception(svm, kvm_run); + return rdmsr_interception(svm); } -static int interrupt_window_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int interrupt_window_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* @@ -2265,8 +2271,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm, return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm, - struct kvm_run *kvm_run) = { +static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, [SVM_EXIT_READ_CR4] = emulate_on_interception, @@ -2321,9 +2326,10 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_NPF] = pf_interception, }; -static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; trace_kvm_exit(exit_code, svm->vmcb->save.rip); @@ -2383,7 +2389,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } - return svm_exit_handlers[exit_code](svm, kvm_run); + return svm_exit_handlers[exit_code](svm); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -2588,7 +2594,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) #define R "e" #endif -static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u16 fs_selector; -- cgit v1.2.3-70-g09d2 From e8b3433a5c062e94e34cadb6144c10689a497bc3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 8 Sep 2009 14:47:38 -0300 Subject: KVM: SVM: remove needless mmap_sem acquision from nested_svm_map nested_svm_map unnecessarily takes mmap_sem around gfn_to_page, since gfn_to_page / get_user_pages are responsible for it. Signed-off-by: Marcelo Tosatti Acked-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 92048a626d4..f54c4f9d286 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1396,10 +1396,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) { struct page *page; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); - if (is_error_page(page)) goto error; -- cgit v1.2.3-70-g09d2 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 ++-- arch/powerpc/kvm/powerpc.c | 3 +- arch/s390/kvm/kvm-s390.c | 3 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 13 ++++-- arch/x86/kvm/vmx.c | 11 +++-- arch/x86/kvm/x86.c | 4 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 90 +++++++++++++++++++++++++++++++++++------ 9 files changed, 108 insertions(+), 28 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f6471c88266..5fdeec5fddc 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { long status; long tmp_base; @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); if (slot < 0) - return; + return -EINVAL; spin_lock(&vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); if (status != 0) { printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return ; + return -EINVAL; } if (!kvm_vsa_base) { @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) } spin_unlock(&vp_lock); ia64_ptr_entry(0x3, slot); + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 95af62217b6..5902bbc2411 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 00e2ce8e91f..544505893c9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { /* every s390 is virtualization enabled ;-) */ + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a46e2dd9aca..295c7c4d9c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -459,7 +459,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f54c4f9d286..59fe4d54da1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); - return; + return -EINVAL; } svm_data->asid_generation = 1; @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage) gdt = (struct desc_struct *)gdt_descr.base; svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); - rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + + return 0; } static void svm_cpu_uninit(int cpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 73cb5dd960c..a187570e483 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void) /* locked but not enabled */ } -static void hardware_enable(void *garbage) +static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + if (read_cr4() & X86_CR4_VMXE) + return -EBUSY; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (FEATURE_CONTROL_LOCKED | @@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage) asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); + + ept_sync_global(); + + return 0; } static void vmclear_local_vcpus(void) @@ -4040,8 +4047,6 @@ static int __init vmx_init(void) if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); - ept_sync_global(); - return 0; out3: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 829e3063e2a..3d83de8bcbf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4691,9 +4691,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) return kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { - kvm_x86_ops->hardware_enable(garbage); + return kvm_x86_ops->hardware_enable(garbage); } void kvm_arch_hardware_disable(void *garbage) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c0a1cc35f08..b985a29d817 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,7 +345,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); -void kvm_arch_hardware_enable(void *garbage); +int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38e4d2c34ac..70c8cbea0a9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; +static int kvm_usage_count = 0; +static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +static int hardware_enable_all(void); +static void hardware_disable_all(void); static bool kvm_rebooting; @@ -339,6 +343,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { static struct kvm *kvm_create_vm(void) { + int r = 0; struct kvm *kvm = kvm_arch_create_vm(); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct page *page; @@ -346,6 +351,11 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; + + r = hardware_enable_all(); + if (r) + goto out_err_nodisable; + #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); @@ -354,8 +364,8 @@ static struct kvm *kvm_create_vm(void) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { - kfree(kvm); - return ERR_PTR(-ENOMEM); + r = -ENOMEM; + goto out_err; } kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page); @@ -363,15 +373,13 @@ static struct kvm *kvm_create_vm(void) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) { - int err; kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; - err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); - if (err) { + r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (r) { #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET put_page(page); #endif - kfree(kvm); - return ERR_PTR(err); + goto out_err; } } #endif @@ -395,6 +403,12 @@ static struct kvm *kvm_create_vm(void) #endif out: return kvm; + +out_err: + hardware_disable_all(); +out_err_nodisable: + kfree(kvm); + return ERR_PTR(r); } /* @@ -453,6 +467,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow(kvm); #endif kvm_arch_destroy_vm(kvm); + hardware_disable_all(); mmdrop(mm); } @@ -1644,11 +1659,21 @@ static struct miscdevice kvm_dev = { static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); + int r; if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; + cpumask_set_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_enable(NULL); + + r = kvm_arch_hardware_enable(NULL); + + if (r) { + cpumask_clear_cpu(cpu, cpus_hardware_enabled); + atomic_inc(&hardware_enable_failed); + printk(KERN_INFO "kvm: enabling virtualization on " + "CPU%d failed\n", cpu); + } } static void hardware_disable(void *junk) @@ -1661,11 +1686,52 @@ static void hardware_disable(void *junk) kvm_arch_hardware_disable(NULL); } +static void hardware_disable_all_nolock(void) +{ + BUG_ON(!kvm_usage_count); + + kvm_usage_count--; + if (!kvm_usage_count) + on_each_cpu(hardware_disable, NULL, 1); +} + +static void hardware_disable_all(void) +{ + spin_lock(&kvm_lock); + hardware_disable_all_nolock(); + spin_unlock(&kvm_lock); +} + +static int hardware_enable_all(void) +{ + int r = 0; + + spin_lock(&kvm_lock); + + kvm_usage_count++; + if (kvm_usage_count == 1) { + atomic_set(&hardware_enable_failed, 0); + on_each_cpu(hardware_enable, NULL, 1); + + if (atomic_read(&hardware_enable_failed)) { + hardware_disable_all_nolock(); + r = -EBUSY; + } + } + + spin_unlock(&kvm_lock); + + return r; +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { int cpu = (long)v; + if (!kvm_usage_count) + return NOTIFY_OK; + val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: @@ -1868,13 +1934,15 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - hardware_disable(NULL); + if (kvm_usage_count) + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - hardware_enable(NULL); + if (kvm_usage_count) + hardware_enable(NULL); return 0; } @@ -1949,7 +2017,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1999,7 +2066,6 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0a: -- cgit v1.2.3-70-g09d2 From 7fcdb5103d10d2eb75876637a2efa9679cce14d3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:15 +0200 Subject: KVM: SVM: reorganize svm_interrupt_allowed This patch reorganizes the logic in svm_interrupt_allowed to make it better to read. This is important because the logic is a lot more complicated with Nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59fe4d54da1..3f3fe815c21 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2472,10 +2472,18 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - return (vmcb->save.rflags & X86_EFLAGS_IF) && - !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - gif_set(svm) && - !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); + int ret; + + if (!gif_set(svm) || + (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) + return 0; + + ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + + if (is_nested(svm)) + return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); + + return ret; } static void enable_irq_window(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 33527ad7e12a5cb50b39165945464600ab2f7632 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:16 +0200 Subject: KVM: SVM: don't copy exit_int_info on nested vmrun The exit_int_info field is only written by the hardware and never read. So it does not need to be copied on a vmrun emulation. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3f3fe815c21..41c996ab87e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1797,8 +1797,6 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept = nested_vmcb->control.intercept; force_new_asid(&svm->vcpu); - svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; - svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", -- cgit v1.2.3-70-g09d2 From e935d48e1b49451490218e1181d9834176200955 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:19 +0200 Subject: KVM: SVM: Remove remaining occurences of rdtscll This patch replaces them with native_read_tsc() which can also be used in expressions and saves a variable on the stack in this case. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 41c996ab87e..9a4dacab6d8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -763,14 +763,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int i; if (unlikely(cpu != vcpu->cpu)) { - u64 tsc_this, delta; + u64 delta; /* * Make sure that the guest sees a monotonically * increasing TSC. */ - rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; + delta = vcpu->arch.host_tsc - native_read_tsc(); svm->vmcb->control.tsc_offset += delta; if (is_nested(svm)) svm->nested.hsave->control.tsc_offset += delta; @@ -792,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - rdtscll(vcpu->arch.host_tsc); + vcpu->arch.host_tsc = native_read_tsc(); } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From e6732a5af9dfcc87078706a1598df0efe5010f73 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:36 -1000 Subject: KVM: Fix printk name error in svm.c Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9a4dacab6d8..d1036ce8917 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -330,13 +330,14 @@ static int svm_hardware_enable(void *garbage) return -EBUSY; if (!has_svm()) { - printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); + printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", + me); return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { - printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", + printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", me); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 3230bb4707278dba25e24cd0a11ea7b2337678ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:37 -1000 Subject: KVM: Fix hotplug of CPUs Both VMX and SVM require per-cpu memory allocation, which is done at module init time, for only online cpus. Backend was not allocating enough structure for all possible CPUs, so new CPUs coming online could not be hardware enabled. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1036ce8917..02a4269be64 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -482,7 +482,7 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME); } - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) goto err; @@ -516,7 +516,7 @@ static __exit void svm_hardware_unsetup(void) { int cpu; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a187570e483..97f4265cda3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1350,15 +1350,17 @@ static void free_kvm_area(void) { int cpu; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) { free_vmcs(per_cpu(vmxarea, cpu)); + per_cpu(vmxarea, cpu) = NULL; + } } static __init int alloc_kvm_area(void) { int cpu; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { struct vmcs *vmcs; vmcs = alloc_vmcs_cpu(cpu); -- cgit v1.2.3-70-g09d2 From 355be0b9300579e02275d7d19374806a974ce622 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 3 Oct 2009 00:31:21 +0200 Subject: KVM: x86: Refactor guest debug IOCTL handling Much of so far vendor-specific code for setting up guest debug can actually be handled by the generic code. This also fixes a minor deficit in the SVM part /wrt processing KVM_GUESTDBG_ENABLE. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 14 ++------------ arch/x86/kvm/vmx.c | 18 +----------------- arch/x86/kvm/x86.c | 28 +++++++++++++++++++++------- 4 files changed, 26 insertions(+), 38 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 295c7c4d9c9..e7f87083260 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -475,8 +475,8 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg); + void (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 02a4269be64..279a2ae21b4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1065,26 +1065,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int old_debug = vcpu->guest_debug; struct vcpu_svm *svm = to_svm(vcpu); - vcpu->guest_debug = dbg->control; - - update_db_intercept(vcpu); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; else svm->vmcb->save.dr7 = vcpu->arch.dr7; - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - - return 0; + update_db_intercept(vcpu); } static void load_host_msrs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 97f4265cda3..70020e505c2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1096,30 +1096,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int old_debug = vcpu->guest_debug; - unsigned long flags; - - vcpu->guest_debug = dbg->control; - if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) - vcpu->guest_debug = 0; - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); else vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - flags = vmcs_readl(GUEST_RFLAGS); - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - update_exception_bitmap(vcpu); - - return 0; } static __init int cpu_has_kvm_support(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f44d565cc9..a06f88e66c8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4472,12 +4472,19 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int i, r; + unsigned long rflags; + int old_debug; + int i; vcpu_load(vcpu); - if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { + old_debug = vcpu->guest_debug; + + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; + + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { for (i = 0; i < KVM_NR_DB_REGS; ++i) vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; vcpu->arch.switch_db_regs = @@ -4488,16 +4495,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } - r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + rflags = kvm_x86_ops->get_rflags(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + kvm_x86_ops->set_rflags(vcpu, rflags); - if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_x86_ops->set_guest_debug(vcpu, dbg); + + if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_DB) kvm_queue_exception(vcpu, DB_VECTOR); - else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + else if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_BP) kvm_queue_exception(vcpu, BP_VECTOR); vcpu_put(vcpu); - return r; + return 0; } /* -- cgit v1.2.3-70-g09d2 From 8d23c4662427507f432c96ac4fa3b76f0a8360cd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 Oct 2009 16:08:25 +0200 Subject: KVM: SVM: Notify nested hypervisor of lost event injections If event_inj is valid on a #vmexit the host CPU would write the contents to exit_int_info, so the hypervisor knows that the event wasn't injected. We don't do this in nested SVM by now which is a bug and fixed by this patch. Signed-off-by: Alexander Graf Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 279a2ae21b4..e37285446cb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + /* + * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have + * to make sure that we do not lose injected events. So check event_inj + * here and copy it to exit_int_info if it is valid. + * Exit_int_info and event_inj can't be both valid because the case + * below only happens on a VMRUN instruction intercept which has + * no valid exit_int_info set. + */ + if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { + struct vmcb_control_area *nc = &nested_vmcb->control; + + nc->exit_int_info = vmcb->control.event_inj; + nc->exit_int_info_err = vmcb->control.event_inj_err; + } + nested_vmcb->control.tlb_ctl = 0; nested_vmcb->control.event_inj = 0; nested_vmcb->control.event_inj_err = 0; -- cgit v1.2.3-70-g09d2 From cd3ff653ae0b45bac7a19208e9c75034fcacc85f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:26 +0200 Subject: KVM: SVM: Move INTR vmexit out of atomic code The nested SVM code emulates a #vmexit caused by a request to open the irq window right in the request function. This is a bug because the request function runs with preemption and interrupts disabled but the #vmexit emulation might sleep. This can cause a schedule()-while-atomic bug and is fixed with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e37285446cb..884bffc70c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -85,6 +85,9 @@ struct nested_state { /* gpa pointers to the real vectors */ u64 vmcb_msrpm; + /* A VMEXIT is required but not yet emulated */ + bool exit_required; + /* cache for intercepts of the guest */ u16 intercept_cr_read; u16 intercept_cr_write; @@ -1379,7 +1382,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) svm->vmcb->control.exit_code = SVM_EXIT_INTR; - if (nested_svm_exit_handled(svm)) { + if (svm->nested.intercept & 1ULL) { + /* + * The #vmexit can't be emulated here directly because this + * code path runs with irqs and preemtion disabled. A + * #vmexit emulation might sleep. Only signal request for + * the #vmexit here. + */ + svm->nested.exit_required = true; nsvm_printk("VMexit -> INTR\n"); return 1; } @@ -2340,6 +2350,13 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, svm->vmcb->save.rip); + if (unlikely(svm->nested.exit_required)) { + nested_svm_vmexit(svm); + svm->nested.exit_required = false; + + return 1; + } + if (is_nested(svm)) { int vmexit; @@ -2615,6 +2632,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) u16 gs_selector; u16 ldt_selector; + /* + * A vmexit emulation is required before the vcpu can be executed + * again. + */ + if (unlikely(svm->nested.exit_required)) + return; + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; -- cgit v1.2.3-70-g09d2 From 0ac406de8f3780c8e0801d5719e1ec531d4a6ec4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:27 +0200 Subject: KVM: SVM: Add tracepoint for nested vmrun This patch adds a dedicated kvm tracepoint for a nested vmrun. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 40 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 884bffc70c7..907af3f3a7a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1726,6 +1726,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) /* nested_vmcb is our indicator if nested SVM is activated */ svm->nested.vmcb = svm->vmcb->save.rax; + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, + nested_vmcb->save.rip, + nested_vmcb->control.int_ctl, + nested_vmcb->control.event_inj, + nested_vmcb->control.nested_ctl); + /* Clear internal status */ kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eac..b5798e12182 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -349,6 +349,39 @@ TRACE_EVENT(kvm_apic_accept_irq, __entry->coalesced ? " (coalesced)" : "") ); +/* + * Tracepoint for nested VMRUN + */ +TRACE_EVENT(kvm_nested_vmrun, + TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, + __u32 event_inj, bool npt), + TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u64, vmcb ) + __field( __u64, nested_rip ) + __field( __u32, int_ctl ) + __field( __u32, event_inj ) + __field( bool, npt ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->vmcb = vmcb; + __entry->nested_rip = nested_rip; + __entry->int_ctl = int_ctl; + __entry->event_inj = event_inj; + __entry->npt = npt; + ), + + TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " + "event_inj: 0x%08x npt: %s\n", + __entry->rip, __entry->vmcb, __entry->nested_rip, + __entry->int_ctl, __entry->event_inj, + __entry->npt ? "on" : "off") +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4de5bc0a8e8..3ab2f9042dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4984,3 +4984,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); -- cgit v1.2.3-70-g09d2 From d8cabddf7e8fbdced2dd668c98d7762c7ef75245 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:28 +0200 Subject: KVM: SVM: Add tracepoint for nested #vmexit This patch adds a tracepoint for every #vmexit we get from a nested guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 43 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 907af3f3a7a..edf6e8b2b84 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2366,6 +2366,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (is_nested(svm)) { int vmexit; + trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, + svm->vmcb->control.exit_info_1, + svm->vmcb->control.exit_info_2, + svm->vmcb->control.exit_int_info, + svm->vmcb->control.exit_int_info_err); + nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", exit_code, svm->vmcb->control.exit_info_1, svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b5798e12182..a7eb6299a26 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -382,6 +382,42 @@ TRACE_EVENT(kvm_nested_vmrun, __entry->npt ? "on" : "off") ); +/* + * Tracepoint for #VMEXIT while nested + */ +TRACE_EVENT(kvm_nested_vmexit, + TP_PROTO(__u64 rip, __u32 exit_code, + __u64 exit_info1, __u64 exit_info2, + __u32 exit_int_info, __u32 exit_int_info_err), + TP_ARGS(rip, exit_code, exit_info1, exit_info2, + exit_int_info, exit_int_info_err), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u32, exit_code ) + __field( __u64, exit_info1 ) + __field( __u64, exit_info2 ) + __field( __u32, exit_int_info ) + __field( __u32, exit_int_info_err ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->exit_code = exit_code; + __entry->exit_info1 = exit_info1; + __entry->exit_info2 = exit_info2; + __entry->exit_int_info = exit_int_info; + __entry->exit_int_info_err = exit_int_info_err; + ), + TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + __entry->rip, + ftrace_print_symbols_seq(p, __entry->exit_code, + kvm_x86_ops->exit_reasons_str), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ab2f9042dd..192d58efc6d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4985,3 +4985,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); -- cgit v1.2.3-70-g09d2 From 17897f366847a9ef8a13e3671a0eb1c15422abed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:29 +0200 Subject: KVM: SVM: Add tracepoint for injected #vmexit This patch adds a tracepoint for a nested #vmexit that gets re-injected to the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 40 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index edf6e8b2b84..369eeb86e87 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1592,6 +1592,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; + trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, + vmcb->control.exit_info_1, + vmcb->control.exit_info_2, + vmcb->control.exit_int_info, + vmcb->control.exit_int_info_err); + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); if (!nested_vmcb) return 1; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a7eb6299a26..4d6bb5ee39b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -418,6 +418,39 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info, __entry->exit_int_info_err) ); +/* + * Tracepoint for #VMEXIT reinjected to the guest + */ +TRACE_EVENT(kvm_nested_vmexit_inject, + TP_PROTO(__u32 exit_code, + __u64 exit_info1, __u64 exit_info2, + __u32 exit_int_info, __u32 exit_int_info_err), + TP_ARGS(exit_code, exit_info1, exit_info2, + exit_int_info, exit_int_info_err), + + TP_STRUCT__entry( + __field( __u32, exit_code ) + __field( __u64, exit_info1 ) + __field( __u64, exit_info2 ) + __field( __u32, exit_int_info ) + __field( __u32, exit_int_info_err ) + ), + + TP_fast_assign( + __entry->exit_code = exit_code; + __entry->exit_info1 = exit_info1; + __entry->exit_info2 = exit_info2; + __entry->exit_int_info = exit_int_info; + __entry->exit_int_info_err = exit_int_info_err; + ), + + TP_printk("reason: %s ext_inf1: 0x%016llx " + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + ftrace_print_symbols_seq(p, __entry->exit_code, + kvm_x86_ops->exit_reasons_str), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 192d58efc6d..a522d9ba81b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4986,3 +4986,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); -- cgit v1.2.3-70-g09d2 From 236649de3360916ef85f95c82723af17a25b9179 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:30 +0200 Subject: KVM: SVM: Add tracepoint for #vmexit because intr pending This patch adds a special tracepoint for the event that a nested #vmexit is injected because kvm wants to inject an interrupt into the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/trace.h | 18 ++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 369eeb86e87..78a391c60a7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1390,7 +1390,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) * the #vmexit here. */ svm->nested.exit_required = true; - nsvm_printk("VMexit -> INTR\n"); + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); return 1; } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d6bb5ee39b..3cc8f444be1 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -451,6 +451,24 @@ TRACE_EVENT(kvm_nested_vmexit_inject, __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_nested_intr_vmexit, + TP_PROTO(__u64 rip), + TP_ARGS(rip), + + TP_STRUCT__entry( + __field( __u64, rip ) + ), + + TP_fast_assign( + __entry->rip = rip + ), + + TP_printk("rip: 0x%016llx\n", __entry->rip) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a522d9ba81b..2cf4146b425 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4987,3 +4987,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); -- cgit v1.2.3-70-g09d2 From ec1ff79084fccdae0dca9b04b89dcdf3235bbfa1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:31 +0200 Subject: KVM: SVM: Add tracepoint for invlpga instruction This patch adds a tracepoint for the event that the guest executed the INVLPGA instruction. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 3 +++ arch/x86/kvm/trace.h | 23 +++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 27 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 78a391c60a7..ba18fb7d365 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1976,6 +1976,9 @@ static int invlpga_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); + trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], + vcpu->arch.regs[VCPU_REGS_RAX]); + /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 3cc8f444be1..7e1f08e959b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -469,6 +469,29 @@ TRACE_EVENT(kvm_nested_intr_vmexit, TP_printk("rip: 0x%016llx\n", __entry->rip) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_invlpga, + TP_PROTO(__u64 rip, int asid, u64 address), + TP_ARGS(rip, asid, address), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( int, asid ) + __field( __u64, address ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->asid = asid; + __entry->address = address; + ), + + TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", + __entry->rip, __entry->asid, __entry->address) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2cf4146b425..86596fc7941 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4988,3 +4988,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); -- cgit v1.2.3-70-g09d2 From 532a46b98963f110e9425a251e127d6537915dde Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:32 +0200 Subject: KVM: SVM: Add tracepoint for skinit instruction This patch adds a tracepoint for the event that the guest executed the SKINIT instruction. This information is important because SKINIT is an SVM extenstion not yet implemented by nested SVM and we may need this information for debugging hypervisors that do not yet run on nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 10 +++++++++- arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ba18fb7d365..8b9f6fbba48 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1987,6 +1987,14 @@ static int invlpga_interception(struct vcpu_svm *svm) return 1; } +static int skinit_interception(struct vcpu_svm *svm) +{ + trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); @@ -2350,7 +2358,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_VMSAVE] = vmsave_interception, [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, - [SVM_EXIT_SKINIT] = invalid_op_interception, + [SVM_EXIT_SKINIT] = skinit_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7e1f08e959b..816e0449db0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -492,6 +492,28 @@ TRACE_EVENT(kvm_invlpga, TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", __entry->rip, __entry->asid, __entry->address) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_skinit, + TP_PROTO(__u64 rip, __u32 slb), + TP_ARGS(rip, slb), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u32, slb ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->slb = slb; + ), + + TP_printk("rip: 0x%016llx slb: 0x%08x\n", + __entry->rip, __entry->slb) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 86596fc7941..098e7f88630 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4989,3 +4989,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); -- cgit v1.2.3-70-g09d2 From d36f19e9ecd22dc035ef4cc6361b564be650f8e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:33 +0200 Subject: KVM: SVM: Remove nsvm_printk debugging code With all important informations now delivered through tracepoints we can savely remove the nsvm_printk debugging code for nested svm. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8b9f6fbba48..69610c5d6de 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -53,15 +53,6 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) -/* Turn on to get debugging output*/ -/* #define NESTED_DEBUG */ - -#ifdef NESTED_DEBUG -#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) -#else -#define nsvm_printk(fmt, args...) do {} while(0) -#endif - static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -1540,14 +1531,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - nsvm_printk("exit code: 0x%x\n", exit_code); if (svm->nested.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } if (vmexit == NESTED_EXIT_DONE) { - nsvm_printk("#VMEXIT reason=%04x\n", exit_code); nested_svm_vmexit(svm); } @@ -1658,10 +1647,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(vmcb, hsave); - /* Kill any pending exceptions */ - if (svm->vcpu.arch.exception.pending == true) - nsvm_printk("WARNING: Pending Exception\n"); - kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); @@ -1826,25 +1811,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) force_new_asid(&svm->vcpu); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { - nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", - nested_vmcb->control.int_ctl); - } if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; - nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", - nested_vmcb->control.exit_int_info, - nested_vmcb->control.int_state); - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; - if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) - nsvm_printk("Injecting Event: 0x%x\n", - nested_vmcb->control.event_inj); svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; @@ -1913,8 +1887,6 @@ static int vmsave_interception(struct vcpu_svm *svm) static int vmrun_interception(struct vcpu_svm *svm) { - nsvm_printk("VMrun\n"); - if (nested_svm_check_permissions(svm)) return 1; @@ -1974,7 +1946,6 @@ static int clgi_interception(struct vcpu_svm *svm) static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - nsvm_printk("INVLPGA\n"); trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], vcpu->arch.regs[VCPU_REGS_RAX]); @@ -2389,10 +2360,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) svm->vmcb->control.exit_int_info, svm->vmcb->control.exit_int_info_err); - nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", - exit_code, svm->vmcb->control.exit_info_1, - svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); - vmexit = nested_svm_exit_special(svm); if (vmexit == NESTED_EXIT_CONTINUE) @@ -2539,7 +2506,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - nsvm_printk("Trying to open IRQ window\n"); nested_svm_intr(svm); -- cgit v1.2.3-70-g09d2 From 565d0998ecac8373b9a9ecd5991abe74318cd235 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 6 Oct 2009 14:25:02 -0500 Subject: KVM: SVM: Support Pause Filter in AMD processors New AMD processors (Family 0x10 models 8+) support the Pause Filter Feature. This feature creates a new field in the VMCB called Pause Filter Count. If Pause Filter Count is greater than 0 and intercepting PAUSEs is enabled, the processor will increment an internal counter when a PAUSE instruction occurs instead of intercepting. When the internal counter reaches the Pause Filter Count value, a PAUSE intercept will occur. This feature can be used to detect contended spinlocks, especially when the lock holding VCPU is not scheduled. Rescheduling another VCPU prevents the VCPU seeking the lock from wasting its quantum by spinning idly. Experimental results show that most spinlocks are held for less than 1000 PAUSE cycles or more than a few thousand. Default the Pause Filter Counter to 3000 to detect the contended spinlocks. Processor support for this feature is indicated by a CPUID bit. On a 24 core system running 4 guests each with 16 VCPUs, this patch improved overall performance of each guest's 32 job kernbench by approximately 3-5% when combined with a scheduler algorithm thati caused the VCPU to sleep for a brief period. Further performance improvement may be possible with a more sophisticated yield algorithm. Signed-off-by: Mark Langsdorf Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 3 ++- arch/x86/kvm/svm.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc..1fecb7e6113 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u16 intercept_dr_write; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[44]; + u8 reserved_1[42]; + u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; u64 tsc_offset; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 69610c5d6de..170b2d9c690 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_NPT (1 << 0) #define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_SVML (1 << 2) +#define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ @@ -654,6 +655,11 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; + if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { + control->pause_filter_count = 3000; + control->intercept |= (1ULL << INTERCEPT_PAUSE); + } + enable_gif(svm); } @@ -2281,6 +2287,12 @@ static int interrupt_window_interception(struct vcpu_svm *svm) return 1; } +static int pause_interception(struct vcpu_svm *svm) +{ + kvm_vcpu_on_spin(&(svm->vcpu)); + return 1; +} + static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, @@ -2316,6 +2328,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, + [SVM_EXIT_PAUSE] = pause_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPGA] = invlpga_interception, -- cgit v1.2.3-70-g09d2 From 6be7d3062b59af891be7e40c6802350de5f78cef Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:54 +0200 Subject: KVM: SVM: Cleanup NMI singlestep Push the NMI-related singlestep variable into vcpu_svm. It's dealing with an AMD-specific deficit, nothing generic for x86. Acked-by: Gleb Natapov Signed-off-by: Jan Kiszka arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2536fbd85b3..4d994ad5051 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -354,7 +354,6 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; - bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 170b2d9c690..ffa6ad216b7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -107,6 +107,8 @@ struct vcpu_svm { u32 *msrpm; struct nested_state nested; + + bool nmi_singlestep; }; /* enable NPT for AMD64 and X86 with PAE */ @@ -1050,7 +1052,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) svm->vmcb->control.intercept_exceptions &= ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); - if (vcpu->arch.singlestep) + if (svm->nmi_singlestep) svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { @@ -1195,13 +1197,13 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && - !svm->vcpu.arch.singlestep) { + !svm->nmi_singlestep) { kvm_queue_exception(&svm->vcpu, DB_VECTOR); return 1; } - if (svm->vcpu.arch.singlestep) { - svm->vcpu.arch.singlestep = false; + if (svm->nmi_singlestep) { + svm->nmi_singlestep = false; if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); @@ -2543,7 +2545,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) /* Something prevents NMI from been injected. Single step over possible problem (IRET or exception injection or interrupt shadow) */ - vcpu->arch.singlestep = true; + svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_intercept(vcpu); } -- cgit v1.2.3-70-g09d2 From 18fa000ae453767b59ab97477925895a3f0c46ea Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:49:59 -0200 Subject: KVM: SVM: Reset cr0 properly on vcpu reset svm_vcpu_reset() was not properly resetting the contents of the guest-visible cr0 register, causing the following issue: https://bugzilla.redhat.com/show_bug.cgi?id=525699 Without resetting cr0 properly, the vcpu was running the SIPI bootstrap routine with paging enabled, making the vcpu get a pagefault exception while trying to run it. Instead of setting vmcb->save.cr0 directly, the new code just resets kvm->arch.cr0 and calls kvm_set_cr0(). The bits that were set/cleared on vmcb->save.cr0 (PG, WP, !CD, !NW) will be set properly by svm_set_cr0(). kvm_set_cr0() is used instead of calling svm_set_cr0() directly to make sure kvm_mmu_reset_context() is called to reset the mmu to nonpaging mode. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ffa6ad216b7..c9ef6c0e1e9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -628,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm) save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; - /* - * cr0 val on cpu init should be 0x60000010, we enable cpu - * cache by default. the orderly way is to enable cache in bios. + /* This is the guest-visible cr0 value. + * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. */ - save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; + svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); + save->cr4 = X86_CR4_PAE; /* rdx = ?? */ -- cgit v1.2.3-70-g09d2 From 3ce672d48400e0112fec7a3cb6bb2120493c6e11 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:50:00 -0200 Subject: KVM: SVM: init_vmcb(): remove redundant save->cr0 initialization The svm_set_cr0() call will initialize save->cr0 properly even when npt is enabled, clearing the NW and CD bits as expected, so we don't need to initialize it manually for npt_enabled anymore. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c9ef6c0e1e9..34b700f9e49 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -648,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm) control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| INTERCEPT_CR3_MASK); save->g_pat = 0x0007040600070406ULL; - /* enable caching because the QEMU Bios doesn't enable it */ - save->cr0 = X86_CR0_ET; save->cr3 = 0; save->cr4 = 0; } -- cgit v1.2.3-70-g09d2 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 49 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm.h | 28 +++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 22 ++++++++++++ arch/x86/kvm/vmx.c | 30 ++++++++++++++++ arch/x86/kvm/x86.c | 77 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 6 ++++ 7 files changed, 214 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 36594ba5772..e1a11416102 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -653,6 +653,55 @@ struct kvm_clock_data { __u32 pad[9]; }; +4.29 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; /* must be zero */ +}; + +4.30 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ef9b4b73cce..950df434763 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -20,6 +20,7 @@ #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -252,4 +253,31 @@ struct kvm_reinject_control { __u8 pit_reinject; __u8 reserved[31]; }; + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7bb6b..06e085614da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f9e49..3de0b37ec03 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22fcd27a0b5..778f059ae42 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2639,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -3985,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8958dca3c..35eea30821d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1342,6 +1342,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: case KVM_CAP_ADJUST_CLOCK: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1883,6 +1884,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_load(vcpu); + + events->exception.injected = vcpu->arch.exception.pending; + events->exception.nr = vcpu->arch.exception.nr; + events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.error_code = vcpu->arch.exception.error_code; + + events->interrupt.injected = vcpu->arch.interrupt.pending; + events->interrupt.nr = vcpu->arch.interrupt.nr; + events->interrupt.soft = vcpu->arch.interrupt.soft; + + events->nmi.injected = vcpu->arch.nmi_injected; + events->nmi.pending = vcpu->arch.nmi_pending; + events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + + events->sipi_vector = vcpu->arch.sipi_vector; + + events->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + if (events->flags) + return -EINVAL; + + vcpu_load(vcpu); + + vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.nr = events->exception.nr; + vcpu->arch.exception.has_error_code = events->exception.has_error_code; + vcpu->arch.exception.error_code = events->exception.error_code; + + vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.nr = events->interrupt.nr; + vcpu->arch.interrupt.soft = events->interrupt.soft; + if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); + + vcpu->arch.nmi_injected = events->nmi.injected; + vcpu->arch.nmi_pending = events->nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); + + vcpu->arch.sipi_vector = events->sipi_vector; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2040,6 +2096,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); break; } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); + + r = -EFAULT; + if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) + break; + r = 0; + break; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + r = -EFAULT; + if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) + break; + + r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 976f4d18185..92045a92d71 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_ADJUST_CLOCK 39 #define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -672,6 +675,9 @@ struct kvm_clock_data { /* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3-70-g09d2