From f6e78475894d6534d7d62714a95e2265f53d2a92 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 2 Aug 2010 15:30:20 +0300 Subject: KVM: Use kvm_get_rflags() and kvm_set_rflags() instead of the raw versions Some rflags bits are owned by the host, not guest, so we need to use kvm_get_rflags() to strip those bits away or kvm_set_rflags() to add them back. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 14 +++++++------- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e4..2a193222c98 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -975,7 +975,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -2127,7 +2127,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2184,7 +2184,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2312,7 +2312,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2323,7 +2323,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2341,7 +2341,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -3384,7 +3384,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154..d09833e45f6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2113,7 +2113,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!is_protmode(vcpu)) return 0; - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ return 3; return vmcs_read16(GUEST_CS_SELECTOR) & 3; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf..3a557eefd2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4310,7 +4310,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : @@ -4340,7 +4340,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.eip = c->eip; memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (irq == NMI_VECTOR) vcpu->arch.nmi_pending = false; @@ -4473,7 +4473,7 @@ restart: r = EMULATE_DONE; toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); @@ -5592,7 +5592,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } -- cgit v1.2.3-70-g09d2 From 6de12732c42c7070af42e3d6e42ecee2838fc920 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 12:51:22 +0200 Subject: KVM: VMX: Optimize vmx_get_rflags() If called several times within the same exit, return cached results. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf..5af42646495 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -118,6 +118,7 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d09833e45f6..4d117072acf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -130,6 +130,7 @@ struct vcpu_vmx { u8 fail; u32 exit_intr_info; u32 idt_vectoring_info; + ulong rflags; struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; @@ -970,17 +971,23 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + rflags = vmcs_readl(GUEST_RFLAGS); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + to_vmx(vcpu)->rflags = rflags; } - return rflags; + return to_vmx(vcpu)->rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -4124,6 +4131,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) | (1 << VCPU_EXREG_PDPTR) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.2.3-70-g09d2 From f4c63e5d5a356b652a3d984edbefca289176c40f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 14:54:28 +0200 Subject: KVM: VMX: Optimize vmx_get_cpl() In long mode, vm86 mode is disallowed, so we need not check for it. Reading rflags.vm may require a VMREAD, so it is expensive. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4d117072acf..e8a35ee0073 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2120,7 +2120,8 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!is_protmode(vcpu)) return 0; - if (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (!is_long_mode(vcpu) + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; return vmcs_read16(GUEST_CS_SELECTOR) & 3; -- cgit v1.2.3-70-g09d2 From 69c730289011df706a1c9890d6e6c5ee822623c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 15:26:44 +0200 Subject: KVM: VMX: Cache cpl We may read the cpl quite often in the same vmexit (instruction privilege check, memory access checks for instruction and operands), so we gain a bit if we cache the value. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5af42646495..35f81b11026 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -119,6 +119,7 @@ enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e8a35ee0073..8f9e77edc01 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -128,6 +128,7 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; + u8 cpl; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -987,6 +988,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; @@ -2005,6 +2007,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -2115,7 +2118,7 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) return vmcs_readl(sf->base); } -static int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu) { if (!is_protmode(vcpu)) return 0; @@ -2127,6 +2130,16 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) return vmcs_read16(GUEST_CS_SELECTOR) & 3; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); + } + return to_vmx(vcpu)->cpl; +} + + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2192,6 +2205,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -4133,6 +4147,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.2.3-70-g09d2 From 9d58b93192065f4b2ba6b880e9b0dab0bc11d0ba Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 16:52:07 +0200 Subject: KVM: VMX: Avoid vmx_recover_nmi_blocking() when unneeded When we haven't injected an interrupt, we don't need to recover the nmi blocking state (since the guest can't set it by itself). This allows us to avoid a VMREAD later on. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8f9e77edc01..53bf6ae493e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -129,6 +129,7 @@ struct vcpu_vmx { int launched; u8 fail; u8 cpl; + bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -2959,6 +2960,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; + vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); @@ -2983,6 +2985,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; + if (to_vmx(vcpu)->nmi_known_unmasked) + return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; } @@ -2996,6 +3000,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmx->vnmi_blocked_time = 0; } } else { + vmx->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -3527,9 +3532,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) + if (cpu_has_virtual_nmis()) { vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + vmx->nmi_known_unmasked = false; + } break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3916,6 +3923,8 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { + if (vmx->nmi_known_unmasked) + return; unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -3932,6 +3941,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); @@ -3970,6 +3983,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, */ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + vmx->nmi_known_unmasked = true; break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = -- cgit v1.2.3-70-g09d2 From f9902069c41254ad116e089e64ea21d3a000cc41 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:20:29 +0200 Subject: KVM: VMX: Qualify check for host NMI Check for the exit reason first; this allows us, later, to avoid a VMREAD for VM_EXIT_INTR_INFO_FIELD. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 53bf6ae493e..89130ba3b69 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3905,7 +3905,8 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI && + (exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (exit_intr_info & INTR_INFO_VALID_MASK)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); -- cgit v1.2.3-70-g09d2 From 00eba012d53e63f620455f7013917e4bf59424f2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:24:54 +0200 Subject: KVM: VMX: Refactor vmx_complete_atomic_exit() Move the exit reason checks to the front of the function, for early exit in the common case. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 89130ba3b69..51aa827e3bb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3896,17 +3896,20 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + return; + + exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI - && is_machine_check(exit_intr_info))) + if (is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI && - (exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (exit_intr_info & INTR_INFO_VALID_MASK)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); -- cgit v1.2.3-70-g09d2 From c5ca8e572c4d8cb8dec1cf5b3fc9c7066f6b2c29 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:37:37 +0200 Subject: KVM: VMX: Don't VMREAD VM_EXIT_INTR_INFO unconditionally Only read it if we're going to use it later. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 51aa827e3bb..f95f48b2600 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3902,6 +3902,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) return; + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ @@ -3919,7 +3920,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; bool unblock_nmi; u8 vector; bool idtv_info_valid; @@ -3929,6 +3930,11 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) if (cpu_has_virtual_nmis()) { if (vmx->nmi_known_unmasked) return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -4176,7 +4182,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); -- cgit v1.2.3-70-g09d2 From 887864758580c80710947c38a4692032163777df Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Mar 2011 17:39:45 +0200 Subject: KVM: VMX: Use cached VM_EXIT_INTR_INFO in handle_exception vmx_complete_atomic_exit() cached it for us, so we can use it here. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f95f48b2600..1bdb49de6a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3118,7 +3118,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) enum emulation_result er; vect_info = vmx->idt_vectoring_info; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info)) return handle_machine_check(vcpu); -- cgit v1.2.3-70-g09d2 From 89a9fb78b5bd8bece353449079726556ecab41df Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 24 Mar 2011 09:45:10 +0100 Subject: KVM: SVM: Remove unused svm_features We use boot_cpu_has now. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2a193222c98..cb43e98eff6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -376,7 +376,6 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; @@ -802,8 +801,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; -- cgit v1.2.3-70-g09d2 From 654f06fc651b01782015185e5b049197255463a3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Mar 2011 15:02:47 +0200 Subject: KVM: VMX: simplify NMI mask management Use vmx_set_nmi_mask() instead of open-coding management of the hardware bit and the software hint (nmi_known_unmasked). There's a slight change of behaviour when running without hardware virtual NMI support - we now clear the NMI mask if NMI delivery faulted in that case as well. This improves emulation accuracy. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1bdb49de6a2..2b99ae72481 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3532,11 +3532,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) { - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - vmx->nmi_known_unmasked = false; - } + vmx_set_nmi_mask(vcpu, true); break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3991,9 +3987,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - vmx->nmi_known_unmasked = true; + vmx_set_nmi_mask(&vmx->vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = -- cgit v1.2.3-70-g09d2 From 3291892450e670c4f170e271cd0c4b63d5a8e41a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 23 Mar 2011 13:40:42 -0300 Subject: KVM: expose async pf through our standard mechanism As Avi recently mentioned, the new standard mechanism for exposing features is KVM_GET_SUPPORTED_CPUID, not spamming CAPs. For some reason async pf missed that. So expose async_pf here. Signed-off-by: Glauber Costa CC: Gleb Natapov CC: Avi Kivity Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a557eefd2f..a38fb9bb342 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2418,6 +2418,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | + (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); entry->ebx = 0; entry->ecx = 0; -- cgit v1.2.3-70-g09d2 From c761e5868e6737abe0464636ebd7fcbb6814c626 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 1 Apr 2011 11:25:03 -0300 Subject: Revert "KVM: Fix race between nmi injection and enabling nmi window" This reverts commit f86368493ec038218e8663cc1b6e5393cd8e008a. Simpler fix to follow. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 4 +--- include/linux/kvm_host.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a38fb9bb342..b9402d5fa0e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -361,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_nmi(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_NMI, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -5208,8 +5208,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } - if (kvm_check_request(KVM_REQ_NMI, vcpu)) - vcpu->arch.nmi_pending = true; } r = kvm_mmu_reload(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 57d7092d771..7ca831e5518 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -43,7 +43,6 @@ #define KVM_REQ_DEACTIVATE_FPU 10 #define KVM_REQ_EVENT 11 #define KVM_REQ_APF_HALT 12 -#define KVM_REQ_NMI 13 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 -- cgit v1.2.3-70-g09d2 From 1499e54af03ae51a937c59035bc86002deae0572 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Fri, 1 Apr 2011 11:26:29 -0300 Subject: KVM: x86: better fix for race between nmi injection and enabling nmi window Fix race between nmi injection and enabling nmi window in a simpler way. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9402d5fa0e..692c70d6fd0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5171,6 +5171,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; + bool nmi_pending; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; @@ -5214,11 +5215,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(r)) goto out; + /* + * An NMI can be injected between local nmi_pending read and + * vcpu->arch.nmi_pending read inside inject_pending_event(). + * But in that case, KVM_REQ_EVENT will be set, which makes + * the race described above benign. + */ + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) + if (nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); -- cgit v1.2.3-70-g09d2 From 70252a1053636c35776d6bc843dd3b260d9d6de1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 19 Jan 2010 12:51:22 +0200 Subject: KVM: extend in-kernel mmio to handle >8 byte transactions Needed for coalesced mmio using sse. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 692c70d6fd0..3b234c18b63 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void) static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static void kvm_set_segment(struct kvm_vcpu *vcpu, @@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -3795,10 +3819,14 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); + + if (handled == bytes) return X86EMUL_CONTINUE; - } + + gpa += handled; + bytes -= handled; + val += handled; trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); @@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3848,9 +3877,14 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); + if (handled == bytes) return X86EMUL_CONTINUE; + gpa += handled; + bytes -= handled; + val += handled; + vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; -- cgit v1.2.3-70-g09d2 From 5287f194bf0d7062d6d99b725366202556f03e28 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 19 Jan 2010 14:20:10 +0200 Subject: KVM: Split mmio completion into a function Make room for sse mmio completions. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b234c18b63..bb6b9d3f5e9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5441,6 +5441,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int complete_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) + return 1; + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5467,20 +5488,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - if (vcpu->arch.pio.count || vcpu->mmio_needed) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) { - r = 0; - goto out; - } - } + r = complete_mmio(vcpu); + if (r <= 0) + goto out; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); -- cgit v1.2.3-70-g09d2 From cef4dea07f6720b36cc93e18a2e68be4bdb71a92 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 12:01:20 +0200 Subject: KVM: 16-byte mmio support Since sse instructions can issue 16-byte mmios, we need to support them. We can't increase the kvm_run mmio buffer size to 16 bytes without breaking compatibility, so instead we break the large mmios into two smaller 8-byte ones. Since the bus is 64-bit we aren't breaking any atomicity guarantees. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++--------- include/linux/kvm_host.h | 7 ++++++- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35f81b11026..e820c6339b8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -30,6 +30,7 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MMIO_SIZE 16 #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb6b9d3f5e9..11d692c7018 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3833,8 +3833,10 @@ mmio: vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; + vcpu->mmio_index = 0; return X86EMUL_IO_NEEDED; } @@ -3886,11 +3888,14 @@ mmio: val += handled; vcpu->mmio_needed = 1; + memcpy(vcpu->mmio_data, val, bytes); vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; - memcpy(vcpu->run->mmio.data, val, bytes); + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + vcpu->mmio_index = 0; return X86EMUL_CONTINUE; } @@ -4498,11 +4503,9 @@ restart: if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; r = EMULATE_DO_MMIO; - } else if (vcpu->mmio_needed) { - if (vcpu->mmio_is_write) - vcpu->mmio_needed = 0; + } else if (vcpu->mmio_needed) r = EMULATE_DO_MMIO; - } else if (r == EMULATION_RESTART) + else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; @@ -5450,9 +5453,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu) return 1; if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, run->mmio.data, 8); - vcpu->mmio_read_completed = 1; vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, run->mmio.data, 8); + vcpu->mmio_index += 8; + if (vcpu->mmio_index < vcpu->mmio_size) { + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->mmio_needed = 1; + return 0; + } + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7ca831e5518..d1f50756706 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -27,6 +27,10 @@ #include +#ifndef KVM_MMIO_SIZE +#define KVM_MMIO_SIZE 8 +#endif + /* * vcpu->requests bit members */ @@ -132,7 +136,8 @@ struct kvm_vcpu { int mmio_read_completed; int mmio_is_write; int mmio_size; - unsigned char mmio_data[8]; + int mmio_index; + unsigned char mmio_data[KVM_MMIO_SIZE]; gpa_t mmio_phys_addr; #endif -- cgit v1.2.3-70-g09d2 From 1d6b114f20d06ac0749686e4d7b7c7913d9116db Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 16:00:35 +0200 Subject: KVM: x86 emulator: do not munge rep prefix Currently we store a rep prefix as 1 or 2 depending on whether it is a REPE or REPNE. Since sse instructions depend on the prefix value, store it as the original opcode to simplify things further on. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++-- arch/x86/kvm/emulate.c | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f521356432..c00aed12755 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -249,8 +249,8 @@ struct x86_emulate_ctxt { }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8..075bb6fc73a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2692,10 +2692,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->lock_prefix = 1; break; case 0xf2: /* REPNE/REPNZ */ - c->rep_prefix = REPNE_PREFIX; - break; case 0xf3: /* REP/REPE/REPZ */ - c->rep_prefix = REPE_PREFIX; + c->rep_prefix = c->b; break; default: goto done_prefixes; -- cgit v1.2.3-70-g09d2 From 5037f6f324cdcc6c9071dc774aba992f96c7e5ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 28 Mar 2011 16:53:59 +0200 Subject: KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Needed for emulating fpu instructions. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/x86.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c00aed12755..4c0e6822611 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -158,6 +158,8 @@ struct x86_emulate_ops { int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ }; /* Type, address-of, and value of an instruction's operand. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 11d692c7018..5af66515337 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4281,6 +4281,22 @@ static void emulator_set_segment_selector(u16 sel, int seg, kvm_set_segment(vcpu, &kvm_seg, seg); } +static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_disable(); + kvm_load_guest_fpu(ctxt->vcpu); + /* + * CR0.TS may reference the host fpu state, not the guest fpu state, + * so it may be clear at this point. + */ + clts(); +} + +static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_enable(); +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, @@ -4304,6 +4320,8 @@ static struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = kvm_set_msr, .get_msr = kvm_get_msr, + .get_fpu = emulator_get_fpu, + .put_fpu = emulator_put_fpu, }; static void cache_all_regs(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 0d7cdee83ad1582eecbf3b4a220e82dcb5ad561c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Mar 2011 11:34:38 +0200 Subject: KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Most SIMD instructions use the 66/f2/f3 prefixes to distinguish between different variants of the same instruction. Usually the encoding is quite regular, but in some cases (including non-SIMD instructions) the prefixes generate very different instructions. Examples include XCHG/PAUSE, MOVQ/MOVDQA/MOVDQU, and MOVBE/CRC32. Allow the emulator to handle these special cases by splitting such opcodes into groups, with different decode flags and execution functions for different prefixes. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 075bb6fc73a..fcce7aeacc8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -75,6 +75,7 @@ #define Stack (1<<13) /* Stack instruction (push/pop) */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ /* Misc flags */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -106,6 +107,7 @@ struct opcode { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; struct group_dual *gdual; + struct gprefix *gprefix; } u; }; @@ -114,6 +116,13 @@ struct group_dual { struct opcode mod3[8]; }; +struct gprefix { + struct opcode pfx_no; + struct opcode pfx_66; + struct opcode pfx_f2; + struct opcode pfx_f3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -2625,7 +2634,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset; + int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix; + bool op_prefix = false; struct opcode opcode, *g_mod012, *g_mod3; struct operand memop = { .type = OP_NONE }; @@ -2662,6 +2672,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (c->b = insn_fetch(u8, 1, c->eip)) { case 0x66: /* operand-size override */ + op_prefix = true; /* switch between 2/4 bytes */ c->op_bytes = def_op_bytes ^ 6; break; @@ -2742,6 +2753,19 @@ done_prefixes: c->d |= opcode.flags; } + if (c->d & Prefix) { + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + c->d |= opcode.flags; + } + c->execute = opcode.u.execute; /* Unrecognised? */ -- cgit v1.2.3-70-g09d2 From 1253791df91b064c039282feea094e5943294924 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Mar 2011 11:41:27 +0200 Subject: KVM: x86 emulator: SSE support Add support for marking an instruction as SSE, switching registers used to the SSE register file. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 6 ++- arch/x86/kvm/emulate.c | 102 +++++++++++++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 4c0e6822611..48693f0d384 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -162,9 +162,11 @@ struct x86_emulate_ops { void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ }; +typedef u32 __attribute__((vector_size(16))) sse128_t; + /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -176,11 +178,13 @@ struct operand { ulong ea; unsigned seg; } mem; + unsigned xmm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; }; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fcce7aeacc8..4e11102f560 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -76,6 +76,7 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ +#define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ @@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, DE_VECTOR, 0, false); } +static int emulate_nm(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, NM_VECTOR, 0, false); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op) } } -static void decode_register_operand(struct operand *op, +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, struct decode_cache *c, int inhibit_bytereg) { @@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op, if (!(c->d & ModRM)) reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); + + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = reg; + read_sse_reg(ctxt, &op->vec_val, reg); + return; + } + op->type = OP_REG; if ((c->d & ByteOp) && !inhibit_bytereg) { op->addr.reg = decode_register(reg, c->regs, highbyte_regs); @@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; op->addr.reg = decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = c->modrm_rm; + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); + return rc; + } fetch_register_operand(op); return rc; } @@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_XMM: + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); + break; case OP_NONE: /* no writeback */ break; @@ -2785,6 +2866,9 @@ done_prefixes: c->op_bytes = 4; } + if (c->d & Sse) + c->op_bytes = 16; + /* ModRM and SIB bytes. */ if (c->d & ModRM) { rc = decode_modrm(ctxt, ops, &memop); @@ -2814,7 +2898,7 @@ done_prefixes: case SrcNone: break; case SrcReg: - decode_register_operand(&c->src, c, 0); + decode_register_operand(ctxt, &c->src, c, 0); break; case SrcMem16: memop.bytes = 2; @@ -2905,7 +2989,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case DstReg: - decode_register_operand(&c->dst, c, + decode_register_operand(ctxt, &c->dst, c, c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstImmUByte: @@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if ((c->d & Sse) + && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM) + || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); + goto done; + } + + if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { rc = emulate_gp(ctxt, 0); -- cgit v1.2.3-70-g09d2 From aa97bb4891b1f1b35e7abef8d1e2bbd3dda07159 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jan 2010 18:09:23 +0200 Subject: KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e11102f560..2b6c24e572d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2415,11 +2415,19 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movdqu(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) @@ -2484,6 +2492,10 @@ static struct opcode group11[] = { I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), }; +static struct gprefix pfx_0f_6f_0f_7f = { + N, N, N, I(Sse, em_movdqu), +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ D6ALU(Lock), @@ -2608,9 +2620,15 @@ static struct opcode twobyte_table[256] = { /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x70 - 0x7F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ X16(D(SrcImm)), /* 0x90 - 0x9F */ @@ -2654,6 +2672,7 @@ static struct opcode twobyte_table[256] = { #undef G #undef GD #undef I +#undef GP #undef D2bv #undef I2bv -- cgit v1.2.3-70-g09d2 From c4f035c60dad45ff8813550dc82540dbbc263df2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 12:39:22 +0200 Subject: KVM: x86 emulator: add framework for instruction intercepts When running in guest mode, certain instructions can be intercepted by hardware. This also holds for nested guests running on emulated virtualization hardware, in particular instructions emulated by kvm itself. This patch adds a framework for intercepting instructions. If an instruction is marked for interception, and if we're running in guest mode, a callback is called to check whether an intercept is needed or not. The callback is called at three points in time: immediately after beginning execution, after checking privilge exceptions, and after checking memory exception. This suits the different interception points defined for different instructions and for the various virtualization instruction sets. In addition, a new X86EMUL_INTERCEPT is defined, which any callback or memory access may define, allowing the more complicated intercepts to be implemented in existing callbacks. Signed-off-by: Avi Kivity Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 20 ++++++++++++++++++++ arch/x86/kvm/emulate.c | 26 ++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 9 +++++++++ 3 files changed, 55 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 48693f0d384..2cfea49d470 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -14,6 +14,8 @@ #include struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; struct x86_exception { u8 vector; @@ -62,6 +64,7 @@ struct x86_exception { #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { /* @@ -160,6 +163,9 @@ struct x86_emulate_ops { int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -203,6 +209,7 @@ struct read_cache { struct decode_cache { u8 twobyte; u8 b; + u8 intercept; u8 lock_prefix; u8 rep_prefix; u8 op_bytes; @@ -244,6 +251,7 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ bool only_vendor_specific_insn; @@ -265,6 +273,18 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +enum x86_intercept_stage { + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + + nr_x86_intercepts +}; + /* Host execution mode. */ #if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b6c24e572d..a81486790ba 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -104,6 +104,7 @@ struct opcode { u32 flags; + u8 intercept; union { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; @@ -2423,10 +2424,13 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) } #define D(_y) { .flags = (_y) } +#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define II(_f, _e, _i) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -2867,6 +2871,7 @@ done_prefixes: } c->execute = opcode.u.execute; + c->intercept = opcode.intercept; /* Unrecognised? */ if (c->d == 0 || (c->d & Undefined)) @@ -3116,12 +3121,26 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { rc = emulate_gp(ctxt, 0); goto done; } + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { @@ -3160,6 +3179,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = ops->intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->execute) { rc = c->execute(ctxt); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5af66515337..36786bbb4c0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4297,6 +4297,13 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, @@ -4322,6 +4329,7 @@ static struct x86_emulate_ops emulate_ops = { .get_msr = kvm_get_msr, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, + .intercept = emulator_intercept, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4376,6 +4384,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ? X86EMUL_MODE_VM86 : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); } -- cgit v1.2.3-70-g09d2 From 3c6e276f22cf29188035535127c4c35aeeafcabc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 12:39:23 +0200 Subject: KVM: x86 emulator: add SVM intercepts Add intercept codes for instructions defined by SVM as interceptable. Signed-off-by: Avi Kivity Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 35 +++++++++++++++++++++++++++++++++++ arch/x86/kvm/emulate.c | 24 +++++++++++++----------- 2 files changed, 48 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2cfea49d470..470ac54ca38 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -281,6 +281,41 @@ enum x86_intercept_stage { enum x86_intercept { x86_intercept_none, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a81486790ba..c2260e57450 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2469,15 +2469,15 @@ static struct opcode group5[] = { }; static struct group_dual group7 = { { - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), + N, N, DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), + DI(SrcNone | ModRM | DstMem | Mov, smsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), + DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { D(SrcNone | ModRM | Priv | VendorSpecific), N, N, D(SrcNone | ModRM | Priv | VendorSpecific), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), N, + DI(SrcNone | ModRM | DstMem | Mov, smsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, } }; static struct opcode group8[] = { @@ -2556,7 +2556,7 @@ static struct opcode opcode_table[256] = { /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, + DI(ImplicitOps | Stack, pushf), DI(ImplicitOps | Stack, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), @@ -2579,7 +2579,8 @@ static struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ N, N, N, D(ImplicitOps | Stack), - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), + D(ImplicitOps), DI(SrcImmByte, intn), + D(ImplicitOps | No64), DI(ImplicitOps, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, N, N, N, @@ -2594,7 +2595,8 @@ static struct opcode opcode_table[256] = { D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), /* 0xF0 - 0xF7 */ N, N, N, N, - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), + G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), @@ -2604,7 +2606,7 @@ static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ N, GD(0, &group7), N, N, N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, @@ -2614,7 +2616,7 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), + D(ImplicitOps | Priv), II(ImplicitOps, em_rdtsc, rdtsc), D(ImplicitOps | Priv), N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, -- cgit v1.2.3-70-g09d2 From 775fde8648ebc588d07de39457aadc7c2131df2e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:24 +0200 Subject: KVM: x86 emulator: Don't write-back cpu-state on X86EMUL_INTERCEPTED This patch prevents the changed CPU state to be written back when the emulator detected that the instruction was intercepted by the guest. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 3 +++ arch/x86/kvm/x86.c | 3 +++ 3 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 470ac54ca38..1dbd0c736cd 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -331,6 +331,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c2260e57450..a2c31e527a9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3592,6 +3592,9 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) ctxt->have_exception = true; + if (rc == X86EMUL_INTERCEPTED) + return EMULATION_INTERCEPTED; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 36786bbb4c0..99bed74779d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4516,6 +4516,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); + if (r == EMULATION_INTERCEPTED) + return EMULATE_DONE; + if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2)) return EMULATE_DONE; -- cgit v1.2.3-70-g09d2 From d09beabd7cd4cf70d982ff54656dc6431df80fa4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:25 +0200 Subject: KVM: x86 emulator: Add check_perm callback This patch adds a check_perm callback for each opcode into the instruction emulator. This will be used to do all necessary permission checks on instructions before checking whether they are intercepted or not. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1dbd0c736cd..460c2d8964b 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -222,6 +222,7 @@ struct decode_cache { u8 seg_override; unsigned int d; int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); unsigned long regs[NR_VCPU_REGS]; unsigned long eip; /* modrm */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a2c31e527a9..4822824b608 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -111,6 +111,7 @@ struct opcode { struct group_dual *gdual; struct gprefix *gprefix; } u; + int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; struct group_dual { @@ -2425,12 +2426,17 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define N D(0) #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } +#define IIP(_f, _e, _i, _p) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -2873,6 +2879,7 @@ done_prefixes: } c->execute = opcode.u.execute; + c->check_perm = opcode.check_perm; c->intercept = opcode.intercept; /* Unrecognised? */ @@ -3136,6 +3143,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Do instruction specific permission checks */ + if (c->check_perm) { + rc = c->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (unlikely(ctxt->guest_mode) && c->intercept) { rc = ops->intercept(ctxt, c->intercept, X86_ICPT_POST_EXCEPT); -- cgit v1.2.3-70-g09d2 From 8ea7d6aef84e278fcb121acff1bd4c3edaa95b8b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:26 +0200 Subject: KVM: x86 emulator: Add flag to check for protected mode instructions This patch adds a flag for the opcoded to tag instruction which are only recognized in protected mode. The necessary check is added too. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++++ arch/x86/kvm/emulate.c | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 460c2d8964b..cab841a034f 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -274,6 +274,10 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + enum x86_intercept_stage { X86_ICPT_PRE_EXCEPT, X86_ICPT_POST_EXCEPT, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4822824b608..3f32a6699fb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -78,6 +78,7 @@ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ #define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ +#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ @@ -3143,6 +3144,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + /* Instruction can only be executed in protected mode */ + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + rc = emulate_ud(ctxt); + goto done; + } + /* Do instruction specific permission checks */ if (c->check_perm) { rc = c->check_perm(ctxt); -- cgit v1.2.3-70-g09d2 From 8a76d7f25f8f24fc5a328c8e15e4a7313cf141b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:27 +0200 Subject: KVM: x86: Add x86 callback for intercept check This patch adds a callback into kvm_x86_ops so that svm and vmx code can do intercept checks on emulated instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++-- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ arch/x86/kvm/svm.c | 9 +++++++++ arch/x86/kvm/vmx.c | 9 +++++++++ arch/x86/kvm/x86.c | 6 +++--- 6 files changed, 74 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index cab841a034f..eb7033cefe8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -25,6 +25,24 @@ struct x86_exception { u64 address; /* cr2 or nested page fault gpa */ }; +/* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + /* * x86_emulate_ops: * @@ -163,8 +181,8 @@ struct x86_emulate_ops { int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ - int (*intercept)(struct x86_emulate_ctxt *ctxt, - enum x86_intercept intercept, + int (*intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, enum x86_intercept_stage stage); }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e820c6339b8..038562c222e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -505,6 +505,8 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct x86_instruction_info; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -592,6 +594,11 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3f32a6699fb..e3e96eada6f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -408,6 +408,26 @@ struct gprefix { (_eip) += (_size); \ }) +static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + struct x86_instruction_info info = { + .intercept = intercept, + .rep_prefix = ctxt->decode.rep_prefix, + .modrm_mod = ctxt->decode.modrm_mod, + .modrm_reg = ctxt->decode.modrm_reg, + .modrm_rm = ctxt->decode.modrm_rm, + .src_val = ctxt->decode.src.val64, + .src_bytes = ctxt->decode.src.bytes, + .dst_bytes = ctxt->decode.dst.bytes, + .ad_bytes = ctxt->decode.ad_bytes, + .next_rip = ctxt->eip, + }; + + return ctxt->ops->intercept(ctxt->vcpu, &info, stage); +} + static inline unsigned long ad_mask(struct decode_cache *c) { return (1UL << (c->ad_bytes << 3)) - 1; @@ -3132,8 +3152,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_PRE_EXCEPT); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3158,8 +3178,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_POST_EXCEPT); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3203,8 +3223,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: if (unlikely(ctxt->guest_mode) && c->intercept) { - rc = ops->intercept(ctxt, c->intercept, - X86_ICPT_POST_MEMACCESS); + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) goto done; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cb43e98eff6..798ebe695f1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,6 +3868,13 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3953,6 +3960,8 @@ static struct kvm_x86_ops svm_x86_ops = { .adjust_tsc_offset = svm_adjust_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2b99ae72481..3dfefe3bcd0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4409,6 +4409,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { } +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4494,6 +4501,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .adjust_tsc_offset = vmx_adjust_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99bed74779d..eebe5465c8c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4297,11 +4297,11 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } -static int emulator_intercept(struct x86_emulate_ctxt *ctxt, - enum x86_intercept intercept, +static int emulator_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + return kvm_x86_ops->check_intercept(vcpu, info, stage); } static struct x86_emulate_ops emulate_ops = { -- cgit v1.2.3-70-g09d2 From cfec82cb7d313ae5b2c2dbb974401d7c214c7b09 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:28 +0200 Subject: KVM: SVM: Add intercept check for emulated cr accesses This patch adds all necessary intercept checks for instructions that access the crX registers. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 3 ++ arch/x86/include/asm/kvm_host.h | 15 ++++++ arch/x86/kvm/emulate.c | 105 +++++++++++++++++++++++++++++++++---- arch/x86/kvm/svm.c | 81 +++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 13 ----- 5 files changed, 192 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index eb7033cefe8..2c0b5b47464 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -304,6 +304,9 @@ enum x86_intercept_stage { enum x86_intercept { x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, x86_intercept_lmsw, x86_intercept_smsw, x86_intercept_lidt, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 038562c222e..f7dfd6479d0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,10 +35,25 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e3e96eada6f..d2e77755efe 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2445,6 +2445,95 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static bool valid_cr(int nr) +{ + switch (nr) { + case 0: + case 2 ... 4: + case 8: + return true; + default: + return false; + } +} + +static int check_cr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + if (!valid_cr(c->modrm_reg)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_cr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int cr = c->modrm_reg; + + static u64 cr_reserved_bits[] = { + 0xffffffff00000000ULL, + 0, 0, 0, /* CR3 checked later */ + CR4_RESERVED_BITS, + 0, 0, 0, + CR8_RESERVED_BITS, + }; + + if (!valid_cr(cr)) + return emulate_ud(ctxt); + + if (new_val & cr_reserved_bits[cr]) + return emulate_gp(ctxt, 0); + + switch (cr) { + case 0: { + u64 cr4, efer; + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) + return emulate_gp(ctxt, 0); + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && + !(cr4 & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + case 3: { + u64 rsvd = 0; + + if (is_long_mode(ctxt->vcpu)) + rsvd = CR3_L_MODE_RESERVED_BITS; + else if (is_pae(ctxt->vcpu)) + rsvd = CR3_PAE_RESERVED_BITS; + else if (is_paging(ctxt->vcpu)) + rsvd = CR3_NONPAE_RESERVED_BITS; + + if (new_val & rsvd) + return emulate_gp(ctxt, 0); + + break; + } + case 4: { + u64 cr4, efer; + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + } + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2632,14 +2721,16 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ N, GD(0, &group7), N, N, - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), + D(ModRM | DstMem | Priv | Op3264), + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), + D(ModRM | SrcMem | Priv | Op3264), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ @@ -3724,14 +3815,6 @@ twobyte_insn: case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); break; case 0x21: /* mov from dr to reg */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 798ebe695f1..ff4ed3619d0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,11 +3868,90 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, \ + .valid = true } + +static struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; + bool valid; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), +}; + +#undef POST_EX + static int svm_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (!icpt_info.valid || stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; } static struct kvm_x86_ops svm_x86_ops = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eebe5465c8c..0d6524fa2af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,19 +60,6 @@ #include #define MAX_IO_MSRS 256 -#define CR0_RESERVED_BITS \ - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR4_RESERVED_BITS \ - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXSAVE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) - -#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) - #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) -- cgit v1.2.3-70-g09d2 From 3b88e41a41344846ee28007ebfe1bb0defa7f86a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:29 +0200 Subject: KVM: SVM: Add intercept check for accessing dr registers This patch adds the intercept checks for instruction accessing the debug registers. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/emulate.c | 63 +++++++++++++++++++++++++++++--------- arch/x86/kvm/svm.c | 6 ++++ 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2c0b5b47464..fdaf59ac1c0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -309,6 +309,8 @@ enum x86_intercept { x86_intercept_clts, x86_intercept_lmsw, x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, x86_intercept_lidt, x86_intercept_sidt, x86_intercept_lgdt, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d2e77755efe..cd9ed9f4527 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -509,6 +509,11 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } +static int emulate_db(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, DB_VECTOR, 0, false); +} + static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) { return emulate_exception(ctxt, GP_VECTOR, err, true); @@ -2534,6 +2539,47 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) +{ + unsigned long dr7; + + ctxt->ops->get_dr(7, &dr7, ctxt->vcpu); + + /* Check if DR7.Global_Enable is set */ + return dr7 & (1 << 13); +} + +static int check_dr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int dr = c->modrm_reg; + u64 cr4; + + if (dr > 7) + return emulate_ud(ctxt); + + cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) + return emulate_ud(ctxt); + + if (check_dr7_gd(ctxt)) + return emulate_db(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_dr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int dr = c->modrm_reg; + + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) + return emulate_gp(ctxt, 0); + + return check_dr_read(ctxt); +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2728,9 +2774,9 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), - D(ModRM | DstMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), - D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ @@ -3818,12 +3864,6 @@ twobyte_insn: c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); break; case 0x22: /* mov reg, cr */ @@ -3835,13 +3875,6 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - if (ops->set_dr(c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U), ctxt->vcpu) < 0) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ff4ed3619d0..381b038c0d0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3882,6 +3882,8 @@ static struct __x86_intercept { [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), }; #undef POST_EX @@ -3939,6 +3941,10 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; default: break; } -- cgit v1.2.3-70-g09d2 From dee6bb70e4ac0588c98cc4e661664f0653117f89 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:30 +0200 Subject: KVM: SVM: Add intercept checks for descriptor table accesses This patch add intercept checks into the KVM instruction emulator to check for the 8 instructions that access the descriptor table addresses. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 ++++++++++++-- arch/x86/kvm/svm.c | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cd9ed9f4527..3ac830a135f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2630,8 +2630,18 @@ static struct opcode group5[] = { D(SrcMem | ModRM | Stack), N, }; +static struct opcode group6[] = { + DI(ModRM | Prot, sldt), + DI(ModRM | Prot, str), + DI(ModRM | Prot | Priv, lldt), + DI(ModRM | Prot | Priv, ltr), + N, N, N, N, +}; + static struct group_dual group7 = { { - N, N, DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), + DI(ModRM | Mov | DstMem | Priv, sgdt), + DI(ModRM | Mov | DstMem | Priv, sidt), + DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, DI(SrcMem16 | ModRM | Mov | Priv, lmsw), DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), @@ -2766,7 +2776,7 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ - N, GD(0, &group7), N, N, + G(0, group6), GD(0, &group7), N, N, N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 381b038c0d0..ce251c90781 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3884,6 +3884,14 @@ static struct __x86_intercept { [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), }; #undef POST_EX -- cgit v1.2.3-70-g09d2 From 01de8b09e6068936f7f5e386cb85637cf926468c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:31 +0200 Subject: KVM: SVM: Add intercept checks for SVM instructions This patch adds the necessary code changes in the instruction emulator and the extensions to svm.c to implement intercept checks for the svm instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 44 +++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 8 ++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3ac830a135f..a3aba9552b3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -77,6 +77,7 @@ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ #define Sse (1<<17) /* SSE Vector instruction */ +#define RMExt (1<<18) /* Opcode extension in ModRM r/m if mod == 3 */ /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ @@ -2580,11 +2581,35 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt) return check_dr_read(ctxt); } +static int check_svme(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + + ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + + if (!(efer & EFER_SVME)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_svme_pa(struct x86_emulate_ctxt *ctxt) +{ + u64 rax = kvm_register_read(ctxt->vcpu, VCPU_REGS_RAX); + + /* Valid physical address? */ + if (rax & 0xffff000000000000) + return emulate_gp(ctxt, 0); + + return check_svme(ctxt); +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ .check_perm = (_p) } #define N D(0) +#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } @@ -2602,6 +2627,16 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt) D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +static struct opcode group7_rm3[] = { + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), + DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), +}; static struct opcode group1[] = { X7(D(Lock)), N @@ -2647,7 +2682,7 @@ static struct group_dual group7 = { { DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { D(SrcNone | ModRM | Priv | VendorSpecific), N, - N, D(SrcNone | ModRM | Priv | VendorSpecific), + N, EXT(0, group7_rm3), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, } }; @@ -2853,6 +2888,7 @@ static struct opcode twobyte_table[256] = { #undef GD #undef I #undef GP +#undef EXT #undef D2bv #undef I2bv @@ -3030,6 +3066,12 @@ done_prefixes: opcode = g_mod3[goffset]; else opcode = g_mod012[goffset]; + + if (opcode.flags & RMExt) { + goffset = c->modrm & 7; + opcode = opcode.u.group[goffset]; + } + c->d |= opcode.flags; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce251c90781..b98d00bfaf8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3892,6 +3892,14 @@ static struct __x86_intercept { [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), }; #undef POST_EX -- cgit v1.2.3-70-g09d2 From d7eb82030699e6151f1356e90d495bf292564fb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:32 +0200 Subject: KVM: SVM: Add intercept checks for remaining group7 instructions This patch implements the emulator intercept checks for the RDTSCP, MONITOR, and MWAIT instructions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 25 +++++++++++++++++++++++-- arch/x86/kvm/svm.c | 7 +++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3aba9552b3..b4adb4cbb5f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2604,6 +2604,16 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) return check_svme(ctxt); } +static int check_rdtsc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt->vcpu)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2627,6 +2637,12 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +static struct opcode group7_rm1[] = { + DI(SrcNone | ModRM | Priv, monitor), + DI(SrcNone | ModRM | Priv, mwait), + N, N, N, N, N, N, +}; + static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), @@ -2638,6 +2654,11 @@ static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), }; +static struct opcode group7_rm7[] = { + N, + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + N, N, N, N, N, N, +}; static struct opcode group1[] = { X7(D(Lock)), N }; @@ -2681,10 +2702,10 @@ static struct group_dual group7 = { { DI(SrcMem16 | ModRM | Mov | Priv, lmsw), DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), N, + D(SrcNone | ModRM | Priv | VendorSpecific), EXT(0, group7_rm1), N, EXT(0, group7_rm3), DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), N, + DI(SrcMem16 | ModRM | Mov | Priv, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b98d00bfaf8..1eb5504ca6f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3871,6 +3871,9 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) #define POST_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_EXCEPT, \ .valid = true } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, \ + .valid = true } static struct __x86_intercept { u32 exit_code; @@ -3900,9 +3903,13 @@ static struct __x86_intercept { [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), }; #undef POST_EX +#undef POST_MEM static int svm_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, -- cgit v1.2.3-70-g09d2 From 8061252ee0d21e1289235a4b7fe61f53010c46ff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:33 +0200 Subject: KVM: SVM: Add intercept checks for remaining twobyte instructions This patch adds intercepts checks for the remaining twobyte instructions to the KVM instruction emulator. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/kvm/emulate.c | 25 ++++++++++++++++++------- arch/x86/kvm/svm.c | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fdaf59ac1c0..f30650f0090 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -344,6 +344,8 @@ enum x86_intercept { x86_intercept_wbinvd, x86_intercept_monitor, x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4adb4cbb5f..0bf1f68a71c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2425,12 +2425,9 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) - return emulate_gp(ctxt, 0); ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; @@ -2614,6 +2611,18 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_rdpmc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); + + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt->vcpu)) || + (rcx > 3)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2846,8 +2855,10 @@ static struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), II(ImplicitOps, em_rdtsc, rdtsc), - D(ImplicitOps | Priv), N, + DI(ImplicitOps | Priv, wrmsr), + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), + DI(ImplicitOps | Priv, rdmsr), + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, N, N, N, N, N, N, N, N, @@ -2871,12 +2882,12 @@ static struct opcode twobyte_table[256] = { X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp), + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), N, N, /* 0xA8 - 0xAF */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1eb5504ca6f..90362892244 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3868,6 +3868,9 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, \ + .valid = true } #define POST_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_EXCEPT, \ .valid = true } @@ -3906,8 +3909,18 @@ static struct __x86_intercept { [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), }; +#undef PRE_EX #undef POST_EX #undef POST_MEM @@ -3968,6 +3981,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, case SVM_EXIT_WRITE_DR0: icpt_info.exit_code += info->modrm_reg; break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; default: break; } -- cgit v1.2.3-70-g09d2 From bf608f88faef1245ff87e731512517fc676ffe02 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:34 +0200 Subject: KVM: SVM: Add intercept checks for one-byte instructions This patch add intercept checks for emulated one-byte instructions to the KVM instruction emulation path. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/kvm/svm.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0bf1f68a71c..cc32e72fe17 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2789,7 +2789,7 @@ static struct opcode opcode_table[256] = { D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), /* 0x90 - 0x97 */ - X8(D(SrcAcc | DstReg)), + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, @@ -2831,7 +2831,7 @@ static struct opcode opcode_table[256] = { D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), /* 0xF0 - 0xF7 */ - N, N, N, N, + N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 90362892244..9eb27100e2e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3918,6 +3918,13 @@ static struct __x86_intercept { [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), }; #undef PRE_EX @@ -3987,6 +3994,13 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, else vmcb->control.exit_info_1 = 0; break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; default: break; } -- cgit v1.2.3-70-g09d2 From f6511935f424b9a25059ae18e91ad11dd24980e6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:35 +0200 Subject: KVM: SVM: Add checks for IO instructions This patch adds code to check for IOIO intercepts on instructions decoded by the KVM instruction emulator. [avi: fix build error due to missing #define D2bvIP] Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 ++++ arch/x86/kvm/emulate.c | 45 +++++++++++++++++++++++++------------- arch/x86/kvm/svm.c | 36 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index f30650f0090..081844860a3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -346,6 +346,10 @@ enum x86_intercept { x86_intercept_mwait, x86_intercept_rdmsr, x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, nr_x86_intercepts }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cc32e72fe17..d88dcfd66a8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2623,6 +2623,28 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_perm_in(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_out(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.bytes = min(c->src.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ @@ -2640,6 +2662,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) +#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ @@ -2773,8 +2796,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bv(DstDI | Mov | String), /* insb, insw/insd */ - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -2825,11 +2848,13 @@ static struct opcode opcode_table[256] = { N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ X4(D(SrcImmByte)), - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), + D2bvIP(SrcNone | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), /* 0xF0 - 0xF7 */ N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), @@ -2923,6 +2948,7 @@ static struct opcode twobyte_table[256] = { #undef EXT #undef D2bv +#undef D2bvIP #undef I2bv #undef D6ALU @@ -3731,11 +3757,6 @@ special_insn: case 0xed: /* in (e/r)ax,dx */ c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ @@ -3744,12 +3765,6 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->dst.val, - c->src.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } ops->pio_out_emulated(c->src.bytes, c->dst.val, &c->src.val, 1, ctxt->vcpu); c->dst.type = OP_NONE; /* Disable writeback. */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9eb27100e2e..5c6512dbac7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3925,6 +3925,10 @@ static struct __x86_intercept { [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), }; #undef PRE_EX @@ -4001,6 +4005,38 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, */ if (info->rep_prefix != REPE_PREFIX) goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } default: break; } -- cgit v1.2.3-70-g09d2 From 628afd2aeb286415f6e7d0ee7c87aae249e7f999 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 12:39:36 +0200 Subject: KVM: SVM: Remove nested sel_cr0_write handling code This patch removes all the old code which handled the nested selective cr0 write intercepts. This code was only in place as a work-around until the instruction emulator is capable of doing the same. This is the case with this patch-set and so the code can be removed. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 78 ++++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5c6512dbac7..779b09194f0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -93,14 +93,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -1362,31 +1354,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -2673,6 +2640,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2696,7 +2686,8 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2741,23 +2732,6 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -3045,7 +3019,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, -- cgit v1.2.3-70-g09d2 From 0be839bfb4b12f17fba2d982353b0f674e327f9e Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Wed, 30 Mar 2011 09:54:47 -0700 Subject: KVM: Remove base_addresss in kvm_pit since it is unused The patch below removes unsigned long base_addresss; in i8254.h since it is unused. Signed-off-by: Justin P. Mattock Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48..b681a9f78c5 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -33,7 +33,6 @@ struct kvm_kpit_state { }; struct kvm_pit { - unsigned long base_addresss; struct kvm_io_device dev; struct kvm_io_device speaker_dev; struct kvm *kvm; -- cgit v1.2.3-70-g09d2 From a0c0ab2feb9d696978a7475dce4253ec62e98a16 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 28 Mar 2011 16:57:49 +0200 Subject: KVM: x86 emulator: do not open code return values from the emulator Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d88dcfd66a8..ae5f49105c3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2287,7 +2287,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, ctxt->eip = c->eip; } - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, @@ -3333,7 +3333,7 @@ done_prefixes: } done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) @@ -4163,5 +4163,5 @@ twobyte_insn: goto writeback; cannot_emulate: - return -1; + return EMULATION_FAILED; } -- cgit v1.2.3-70-g09d2 From 7c5625227ff8c81953e953d8e25c3eba2ab0aeb3 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 28 Mar 2011 10:29:27 +0800 Subject: KVM: MMU: remove mmu_seq verification on pte update path The mmu_seq verification can be removed since we get the pfn in the protection of mmu_lock. Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 16 +++++----------- arch/x86/kvm/paging_tmpl.h | 4 +--- 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f7dfd6479d0..ecdc562ea3e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -274,7 +274,7 @@ struct kvm_mmu { struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq); + u64 *spte, const void *pte); hpa_t root_hpa; int root_level; int shadow_root_level; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee..28418054b88 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *pte, unsigned long mmu_seq) + const void *pte) { WARN_ON(1); } @@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, - u64 *spte, - const void *new, unsigned long mmu_seq) + struct kvm_mmu_page *sp, u64 *spte, + const void *new) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) { ++vcpu->kvm->stat.mmu_pde_zapped; @@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); - unsigned long mmu_seq; u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, invlpg_counter, r, flooded = 0; @@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - spin_lock(&vcpu->kvm->mmu_lock); if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; @@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word)) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, - mmu_seq); + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (!remote_flush && need_remote_flush(entry, *spte)) remote_flush = true; ++spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d86..74f8567d57a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -325,7 +325,7 @@ no_present: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; @@ -342,8 +342,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, kvm_release_pfn_clean(pfn); return; } - if (mmu_notifier_retry(vcpu, mmu_seq)) - return; /* * we call mmu_set_spte() with host_writable = true because that -- cgit v1.2.3-70-g09d2 From 09000adb86550d2895b64faa52e64eaec3cae7b2 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 25 Mar 2011 10:32:13 +0530 Subject: KVM: PPC: Fix issue clearing exit timing counters Following dump is observed on host when clearing the exit timing counters [root@p1021mds kvm]# echo -n 'c' > vm1200_vcpu0_timing INFO: task echo:1276 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. echo D 0ff5bf94 0 1276 1190 0x00000000 Call Trace: [c2157e40] [c0007908] __switch_to+0x9c/0xc4 [c2157e50] [c040293c] schedule+0x1b4/0x3bc [c2157e90] [c04032dc] __mutex_lock_slowpath+0x74/0xc0 [c2157ec0] [c00369e4] kvmppc_init_timing_stats+0x20/0xb8 [c2157ed0] [c0036b00] kvmppc_exit_timing_write+0x84/0x98 [c2157ef0] [c00b9f90] vfs_write+0xc0/0x16c [c2157f10] [c00ba284] sys_write+0x4c/0x90 [c2157f40] [c000e320] ret_from_syscall+0x0/0x3c The vcpu->mutex is used by kvm_ioctl_* (KVM_RUN etc) and same was used when clearing the stats (in kvmppc_init_timing_stats()). What happens is that when the guest is idle then it held the vcpu->mutx. While the exiting timing process waits for guest to release the vcpu->mutex and a hang state is reached. Now using seprate lock for exit timing stats. Signed-off-by: Bharat Bhushan Acked-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/powerpc.c | 4 ++++ arch/powerpc/kvm/timing.c | 10 +++++++--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a3..890897cee05 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_vcpu_arch { u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING + struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efd..ec3d2e75c0a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -284,6 +284,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + return 0; } diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a3..18f40fd3e98 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) { int i; - /* pause guest execution to avoid concurrent updates */ - mutex_lock(&vcpu->mutex); + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.last_exit_type = 0xDEAD; for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { @@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_exit.tv64 = 0; vcpu->arch.timing_last_enter.tv64 = 0; - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->arch.exit_timing_lock); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) @@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) return; } + mutex_lock(&vcpu->arch.exit_timing_lock); + vcpu->arch.timing_count_type[type]++; /* sum */ @@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) vcpu->arch.timing_min_duration[type] = duration; if (unlikely(duration > vcpu->arch.timing_max_duration[type])) vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); } void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 8b18bc378224b4f195145b407b95768a289497e3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Apr 2011 16:21:58 +0300 Subject: KVM: x86 emulator: Re-add VendorSpecific tag to VMMCALL insn VMMCALL needs the VendorSpecific tag so that #UD emulation (called if a guest running on AMD was migrated to an Intel host) is allowed to process the instruction. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ae5f49105c3..0e31b0c249e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2677,7 +2677,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DIP(SrcNone | ModRM | Prot , vmmcall, check_svme), + DIP(SrcNone | ModRM | Prot | VendorSpecific, vmmcall, check_svme), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), -- cgit v1.2.3-70-g09d2 From bfeed29d6d3ebd5f31253d2c067e4e6c4aeb376b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Apr 2011 16:25:20 +0300 Subject: KVM: x86 emulator: Drop EFER.SVME requirement from VMMCALL VMMCALL requires EFER.SVME to be enabled in the host, not in the guest, which is what check_svme() checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0e31b0c249e..50bffb98ca8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2677,7 +2677,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DIP(SrcNone | ModRM | Prot | VendorSpecific, vmmcall, check_svme), + DI(SrcNone | ModRM | Prot | VendorSpecific, vmmcall), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), -- cgit v1.2.3-70-g09d2 From fbc0db76b77125e0a5131fb886cbaafa1ec5c525 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:46 +0100 Subject: KVM: SVM: Implement infrastructure for TSC_RATE_MSR This patch enhances the kvm_amd module with functions to support the TSC_RATE_MSR which can be used to set a given tsc frequency for the guest vcpu. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/svm.c | 54 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce71413d0..485b4f1f079 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -118,6 +118,7 @@ complete list. */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 779b09194f0..83015009995 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -63,6 +63,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -136,8 +138,13 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU static struct svm_direct_access_msrs { @@ -560,6 +567,10 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); } @@ -601,6 +612,11 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + svm_init_erratum_383(); return 0; @@ -843,6 +859,32 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1037,6 +1079,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1130,6 +1174,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -2784,7 +2834,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_TSC: { struct vmcb *vmcb = get_host_vmcb(svm); - *data = vmcb->control.tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); + break; } case MSR_STAR: -- cgit v1.2.3-70-g09d2 From 1e993611d0dc879fde25515dc9867d1cfd4c5137 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:47 +0100 Subject: KVM: X86: Let kvm-clock report the right tsc frequency This patch changes the kvm_guest_time_update function to use TSC frequency the guest actually has for updating its clock. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 6 +++--- arch/x86/kvm/x86.c | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ecdc562ea3e..e3aaa02ca03 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -396,7 +396,10 @@ struct kvm_vcpu_arch { u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u32 virtual_tsc_khz; bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; bool nmi_pending; bool nmi_injected; @@ -466,9 +469,6 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; - u32 virtual_tsc_khz; - u32 virtual_tsc_mult; - s8 virtual_tsc_shift; struct kvm_xen_hvm_config xen_hvm_config; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0d6524fa2af..78d729174d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -969,6 +969,14 @@ static inline int kvm_tsc_changes_freq(void) return ret; } +static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.virtual_tsc_khz) + return vcpu->arch.virtual_tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + static inline u64 nsec_to_cycles(u64 nsec) { u64 ret; @@ -982,20 +990,19 @@ static inline u64 nsec_to_cycles(u64 nsec) return ret; } -static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) +static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, - &kvm->arch.virtual_tsc_shift, - &kvm->arch.virtual_tsc_mult); - kvm->arch.virtual_tsc_khz = this_tsc_khz; + &vcpu->arch.tsc_catchup_shift, + &vcpu->arch.tsc_catchup_mult); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) { u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, - vcpu->kvm->arch.virtual_tsc_mult, - vcpu->kvm->arch.virtual_tsc_shift); + vcpu->arch.tsc_catchup_mult, + vcpu->arch.tsc_catchup_shift); tsc += vcpu->arch.last_tsc_write; return tsc; } @@ -1062,8 +1069,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); kernel_ns = get_kernel_ns(); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - + this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -6060,8 +6066,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - if (!kvm->arch.virtual_tsc_khz) - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); + kvm_init_tsc_catchup(vcpu, max_tsc_khz); r = kvm_mmu_create(vcpu); if (r < 0) -- cgit v1.2.3-70-g09d2 From 8f6055cbaf68cbd9ff2692a2cfa691b43629ccd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:48 +0100 Subject: KVM: X86: Make tsc_delta calculation a function of guest tsc The calculation of the tsc_delta value to ensure a forward-going tsc for the guest is a function of the host-tsc. This works as long as the guests tsc_khz is equal to the hosts tsc_khz. With tsc-scaling hardware support this is not longer true and the tsc_delta needs to be calculated using guest_tsc values. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78d729174d9..fcce29b7b6f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2113,8 +2113,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { /* Make sure TSC doesn't go backwards */ - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + s64 tsc_delta; + u64 tsc; + + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : + tsc - vcpu->arch.last_guest_tsc; + if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { -- cgit v1.2.3-70-g09d2 From 4051b18801f5b47bb0369feefdc80e57819d0ddf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:49 +0100 Subject: KVM: X86: Implement call-back to propagate virtual_tsc_khz This patch implements a call-back into the architecture code to allow the propagation of changes to the virtual tsc_khz of the vcpu. On SVM it updates the tsc_ratio variable, on VMX it does nothing. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx.c | 11 +++++++++++ 3 files changed, 45 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e3aaa02ca03..f3a7116f802 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -606,6 +606,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 83015009995..a39fde4f5fe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -885,6 +885,38 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) return _tsc; } +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + return; + + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ + if (user_tsc_khz == 0) { + vcpu->arch.virtual_tsc_khz = 0; + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + vcpu->arch.virtual_tsc_khz = user_tsc_khz; + svm->tsc_ratio = ratio; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4159,6 +4191,7 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dfefe3bcd0..e19c7a5473d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1160,6 +1160,16 @@ static u64 guest_read_tsc(void) return host_tsc + tsc_offset; } +/* + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ + * ioctl. In this case the call-back should update internal vmx state to make + * the changes effective. + */ +static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + /* Nothing to do here */ +} + /* * writes 'offset' into guest's timestamp counter offset register */ @@ -4497,6 +4507,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, -- cgit v1.2.3-70-g09d2 From 857e40999e35906baa367a79137019912cfb5434 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:50 +0100 Subject: KVM: X86: Delegate tsc-offset calculation to architecture code With TSC scaling in SVM the tsc-offset needs to be calculated differently. This patch propagates this calculation into the architecture specific modules so that this complexity can be handled there. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 10 +++++----- 4 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f3a7116f802..da0a8ce3a13 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -609,6 +609,8 @@ struct kvm_x86_ops { void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a39fde4f5fe..8c4549bef4e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -943,6 +943,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4194,6 +4203,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e19c7a5473d..aabe3334d06 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1184,6 +1184,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) vmcs_write64(TSC_OFFSET, offset + adjustment); } +static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + return target_tsc - native_read_tsc(); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -4510,6 +4515,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + .compute_tsc_offset = vmx_compute_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcce29b7b6f..579ce34e790 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -977,7 +977,7 @@ static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) return __this_cpu_read(cpu_tsc_khz); } -static inline u64 nsec_to_cycles(u64 nsec) +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { u64 ret; @@ -985,7 +985,7 @@ static inline u64 nsec_to_cycles(u64 nsec) if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - ret = nsec * __this_cpu_read(cpu_tsc_khz); + ret = nsec * vcpu_tsc_khz(vcpu); do_div(ret, USEC_PER_SEC); return ret; } @@ -1015,7 +1015,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) s64 sdiff; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = data - native_read_tsc(); + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; sdiff = data - kvm->arch.last_tsc_write; @@ -1031,13 +1031,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * In that case, for a reliable TSC, we can match TSC offsets, * or make a best guest using elapsed value. */ - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && elapsed < 5ULL * NSEC_PER_SEC) { if (!check_tsc_unstable()) { offset = kvm->arch.last_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { - u64 delta = nsec_to_cycles(elapsed); + u64 delta = nsec_to_cycles(vcpu, elapsed); offset += delta; pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } -- cgit v1.2.3-70-g09d2 From 92a1f12d2598f429bd8639e21d89305e787115c5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Mar 2011 09:44:51 +0100 Subject: KVM: X86: Implement userspace interface to set virtual_tsc_khz This patch implements two new vm-ioctls to get and set the virtual_tsc_khz if the machine supports tsc-scaling. Setting the tsc-frequency is only possible before userspace creates any vcpu. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 23 +++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/svm.c | 20 ++++++++++++++++++++ arch/x86/kvm/x86.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 5 +++++ 5 files changed, 90 insertions(+) (limited to 'arch') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 9bef4e4cec5..1b9eaa7e885 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1263,6 +1263,29 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +4.54 KVM_SET_TSC_KHZ + +Capability: KVM_CAP_TSC_CONTROL +Architectures: x86 +Type: vcpu ioctl +Parameters: virtual tsc_khz +Returns: 0 on success, -1 on error + +Specifies the tsc frequency for the virtual machine. The unit of the +frequency is KHz. + +4.55 KVM_GET_TSC_KHZ + +Capability: KVM_CAP_GET_TSC_KHZ +Architectures: x86 +Type: vcpu ioctl +Parameters: none +Returns: virtual tsc-khz on success, negative value on error + +Returns the tsc frequency of the guest. The unit of the return value is +KHz. If the host has unstable tsc this ioctl returns -EIO instead as an +error. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index da0a8ce3a13..bd57639fd5d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -655,6 +655,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8c4549bef4e..a9887376243 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -64,6 +64,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) #define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL static bool erratum_383_found __read_mostly; @@ -189,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -798,6 +801,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 579ce34e790..1d5a7f41879 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -87,6 +87,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +bool kvm_has_tsc_control; +EXPORT_SYMBOL_GPL(kvm_has_tsc_control); +u32 kvm_max_guest_tsc_khz; +EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -1986,6 +1991,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_GET_TSC_KHZ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2012,6 +2018,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XCRS: r = cpu_has_xsave; break; + case KVM_CAP_TSC_CONTROL: + r = kvm_has_tsc_control; + break; default: r = 0; break; @@ -3045,6 +3054,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; } + case KVM_SET_TSC_KHZ: { + u32 user_tsc_khz; + + r = -EINVAL; + if (!kvm_has_tsc_control) + break; + + user_tsc_khz = (u32)arg; + + if (user_tsc_khz >= kvm_max_guest_tsc_khz) + goto out; + + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); + + r = 0; + goto out; + } + case KVM_GET_TSC_KHZ: { + r = -EIO; + if (check_tsc_unstable()) + goto out; + + r = vcpu_tsc_khz(vcpu); + + goto out; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ea2dc1a2e13..2f63ebeac63 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -541,6 +541,8 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_GET_PVINFO 57 #define KVM_CAP_PPC_IRQ_LEVEL 58 #define KVM_CAP_ASYNC_PF 59 +#define KVM_CAP_TSC_CONTROL 60 +#define KVM_CAP_GET_TSC_KHZ 61 #ifdef KVM_CAP_IRQ_ROUTING @@ -677,6 +679,9 @@ struct kvm_clock_data { #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* Available with KVM_CAP_PPC_GET_PVINFO */ #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) +/* Available with KVM_CAP_TSC_CONTROL */ +#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) +#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) /* * ioctls for vcpu fds -- cgit v1.2.3-70-g09d2 From e3e9ed3d2c443fd90a04fb7ff231ad53ef087417 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 12:30:03 +0200 Subject: KVM: SVM: Fix fault-rip on vmsave/vmload emulation When the emulation of vmload or vmsave fails because the guest passed an unsupported physical address it gets an #GP with rip pointing to the instruction after vmsave/vmload. This is a bug and fixed by this patch. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9887376243..a6bf2ad7429 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2518,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2539,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); -- cgit v1.2.3-70-g09d2 From 3ca3ac4dae5da5af375a9e80d2316ccfa7f0c6ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 16:52:26 +0200 Subject: KVM: x86 emulator: Add helpers for memory access using segmented addresses Will help later adding proper segment checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 75 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 50bffb98ca8..8c38f6ca935 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,6 +540,15 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + return ctxt->ops->read_std(linear(ctxt, addr), data, size, ctxt->vcpu, + &ctxt->exception); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -604,13 +613,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, size, 2); if (rc != X86EMUL_CONTINUE) return rc; addr.ea += 2; - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, address, op_bytes); return rc; } @@ -950,6 +957,32 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +static int segmented_read(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + return read_emulated(ctxt, ctxt->ops, linear(ctxt, addr), data, size); +} + +static int segmented_write(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *data, + unsigned size) +{ + return ctxt->ops->write_emulated(linear(ctxt, addr), data, size, + &ctxt->exception, ctxt->vcpu); +} + +static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *orig_data, const void *data, + unsigned size) +{ + return ctxt->ops->cmpxchg_emulated(linear(ctxt, addr), orig_data, data, + size, &ctxt->exception, ctxt->vcpu); +} + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned int size, unsigned short port, @@ -1197,20 +1230,16 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, break; case OP_MEM: if (c->lock_prefix) - rc = ops->cmpxchg_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.orig_val, - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_cmpxchg(ctxt, + c->dst.addr.mem, + &c->dst.orig_val, + &c->dst.val, + c->dst.bytes); else - rc = ops->write_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_write(ctxt, + c->dst.addr.mem, + &c->dst.val, + c->dst.bytes); if (rc != X86EMUL_CONTINUE) return rc; break; @@ -1249,7 +1278,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); addr.seg = VCPU_SREG_SS; - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); + rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -3440,16 +3469,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), - c->src.valptr, c->src.bytes); + rc = segmented_read(ctxt, c->src.addr.mem, + c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), - &c->src2.val, c->src2.bytes); + rc = segmented_read(ctxt, c->src2.addr.mem, + &c->src2.val, c->src2.bytes); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3460,7 +3489,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { /* optimisation - avoid slow emulated read if Mov */ - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), + rc = segmented_read(ctxt, c->dst.addr.mem, &c->dst.val, c->dst.bytes); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.2.3-70-g09d2 From 38503911b32186240301bbe81601cfabb37e752e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 18:48:09 +0200 Subject: KVM: x86 emulator: move invlpg emulation into a function It's going to get more complicated soon. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8c38f6ca935..c522b4e3dbb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2477,6 +2477,15 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_invlpg(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + emulate_invlpg(ctxt->vcpu, linear(ctxt, c->src.addr.mem)); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3966,10 +3975,7 @@ twobyte_insn: rc = X86EMUL_PROPAGATE_FAULT; goto done; case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, - linear(ctxt, c->src.addr.mem)); - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_invlpg(ctxt); break; default: goto cannot_emulate; -- cgit v1.2.3-70-g09d2 From 9fa088f4d24f045d91c37a5e55f0d2be2ef387ad Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 Mar 2011 18:54:30 +0200 Subject: KVM: x86 emulator: change address linearization to return an error code Preparing to add segment checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c522b4e3dbb..b46fa374d0f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -489,8 +489,9 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static ulong linear(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr) +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + ulong *linear) { struct decode_cache *c = &ctxt->decode; ulong la; @@ -498,7 +499,8 @@ static ulong linear(struct x86_emulate_ctxt *ctxt, la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; if (c->ad_bytes != 8) la &= (u32)-1; - return la; + *linear = la; + return X86EMUL_CONTINUE; } static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, @@ -545,7 +547,13 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, void *data, unsigned size) { - return ctxt->ops->read_std(linear(ctxt, addr), data, size, ctxt->vcpu, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, &ctxt->exception); } @@ -962,7 +970,13 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, void *data, unsigned size) { - return read_emulated(ctxt, ctxt->ops, linear(ctxt, addr), data, size); + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return read_emulated(ctxt, ctxt->ops, linear, data, size); } static int segmented_write(struct x86_emulate_ctxt *ctxt, @@ -970,7 +984,13 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, const void *data, unsigned size) { - return ctxt->ops->write_emulated(linear(ctxt, addr), data, size, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_emulated(linear, data, size, &ctxt->exception, ctxt->vcpu); } @@ -979,7 +999,13 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, const void *orig_data, const void *data, unsigned size) { - return ctxt->ops->cmpxchg_emulated(linear(ctxt, addr), orig_data, data, + int rc; + ulong linear; + + rc = linearize(ctxt, addr, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, size, &ctxt->exception, ctxt->vcpu); } @@ -2480,7 +2506,12 @@ static int em_movdqu(struct x86_emulate_ctxt *ctxt) static int em_invlpg(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - emulate_invlpg(ctxt->vcpu, linear(ctxt, c->src.addr.mem)); + int rc; + ulong linear; + + rc = linearize(ctxt, c->src.addr.mem, &linear); + if (rc == X86EMUL_CONTINUE) + emulate_invlpg(ctxt->vcpu, linear); /* Disable writeback. */ c->dst.type = OP_NONE; return X86EMUL_CONTINUE; -- cgit v1.2.3-70-g09d2 From 83b8795a29c53a5f9f202933818128aa54c3e8d2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 11:31:19 +0300 Subject: KVM: x86 emulator: pass access size and read/write intent to linearize() Needed for segment read/write checks. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b46fa374d0f..a2d343c4c0c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -491,6 +491,7 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, static int linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, + unsigned size, bool write, ulong *linear) { struct decode_cache *c = &ctxt->decode; @@ -550,7 +551,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, @@ -973,7 +974,7 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; return read_emulated(ctxt, ctxt->ops, linear, data, size); @@ -987,7 +988,7 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->write_emulated(linear, data, size, @@ -1002,7 +1003,7 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, int rc; ulong linear; - rc = linearize(ctxt, addr, &linear); + rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, @@ -2509,7 +2510,7 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) int rc; ulong linear; - rc = linearize(ctxt, c->src.addr.mem, &linear); + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); if (rc == X86EMUL_CONTINUE) emulate_invlpg(ctxt->vcpu, linear); /* Disable writeback. */ -- cgit v1.2.3-70-g09d2 From 52fd8b445f5e8572526e3f84c753079470152414 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 12:33:12 +0300 Subject: KVM: x86 emulator: move linearize() downwards So it can call emulate_gp() without forward declarations. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a2d343c4c0c..601a9bca4b7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -489,21 +489,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static int linearize(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr, - unsigned size, bool write, - ulong *linear) -{ - struct decode_cache *c = &ctxt->decode; - ulong la; - - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; - if (c->ad_bytes != 8) - la &= (u32)-1; - *linear = la; - return X86EMUL_CONTINUE; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -543,6 +528,21 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + struct decode_cache *c = &ctxt->decode; + ulong la; + + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + if (c->ad_bytes != 8) + la &= (u32)-1; + *linear = la; + return X86EMUL_CONTINUE; +} + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, -- cgit v1.2.3-70-g09d2 From 56697687da592d0429c0c3ab80ee7e9d20a3b6e5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 14:08:51 +0300 Subject: KVM: x86 emulator: move desc_limit_scaled() For reuse later. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 601a9bca4b7..793aff52a4b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -464,6 +464,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + static void set_seg_override(struct decode_cache *c, int seg) { c->has_seg_override = true; @@ -1040,13 +1047,6 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, return 1; } -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) -- cgit v1.2.3-70-g09d2 From 618ff15de19109af126b33d90d7eaec27e61c691 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Apr 2011 12:32:09 +0300 Subject: KVM: x86 emulator: implement segment permission checks Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 793aff52a4b..2ec69bc8584 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -515,6 +515,11 @@ static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) return emulate_exception(ctxt, GP_VECTOR, err, true); } +static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) +{ + return emulate_exception(ctxt, SS_VECTOR, err, true); +} + static int emulate_ud(struct x86_emulate_ctxt *ctxt) { return emulate_exception(ctxt, UD_VECTOR, 0, false); @@ -541,13 +546,71 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ulong *linear) { struct decode_cache *c = &ctxt->decode; + struct desc_struct desc; + bool usable; ulong la; + u32 lim; + unsigned cpl, rpl; la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + switch (ctxt->mode) { + case X86EMUL_MODE_REAL: + break; + case X86EMUL_MODE_PROT64: + if (((signed long)la << 16) >> 16 != la) + return emulate_gp(ctxt, 0); + break; + default: + usable = ctxt->ops->get_cached_descriptor(&desc, NULL, addr.seg, + ctxt->vcpu); + if (!usable) + goto bad; + /* code segment or read-only data segment */ + if (((desc.type & 8) || !(desc.type & 2)) && write) + goto bad; + /* unreadable code segment */ + if ((desc.type & 8) && !(desc.type & 2)) + goto bad; + lim = desc_limit_scaled(&desc); + if ((desc.type & 8) || !(desc.type & 4)) { + /* expand-up segment */ + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } else { + /* exapand-down segment */ + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) + goto bad; + lim = desc.d ? 0xffffffff : 0xffff; + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } + cpl = ctxt->ops->cpl(ctxt->vcpu); + rpl = ctxt->ops->get_segment_selector(addr.seg, ctxt->vcpu) & 3; + cpl = max(cpl, rpl); + if (!(desc.type & 8)) { + /* data segment */ + if (cpl > desc.dpl) + goto bad; + } else if ((desc.type & 8) && !(desc.type & 4)) { + /* nonconforming code segment */ + if (cpl != desc.dpl) + goto bad; + } else if ((desc.type & 8) && (desc.type & 4)) { + /* conforming code segment */ + if (cpl < desc.dpl) + goto bad; + } + break; + } if (c->ad_bytes != 8) la &= (u32)-1; *linear = la; return X86EMUL_CONTINUE; +bad: + if (addr.seg == VCPU_SREG_SS) + return emulate_ss(ctxt, addr.seg); + else + return emulate_gp(ctxt, addr.seg); } static int segmented_read_std(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3-70-g09d2 From 7ae441eac521b2006c9f03c4f2a23582c07fd76d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 31 Mar 2011 12:06:41 +0200 Subject: KVM: emulator: do not needlesly sync registers from emulator ctxt to vcpu Currently we sync registers back and forth before/after exiting to userspace for IO, but during IO device model shouldn't need to read/write the registers, so we can as well skip those sync points. The only exaception is broken vmware backdor interface. The new code sync registers content during IO only if registers are read from/written to by userspace in the middle of the IO operation and this almost never happens in practise. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 47 +++++++++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd57639fd5d..3e03f37f43e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -385,6 +385,8 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; gpa_t time; struct pvclock_vcpu_time_info hv_clock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d5a7f41879..3416a347384 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4420,6 +4420,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) @@ -4502,6 +4503,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + bool writeback = true; kvm_clear_exception_queue(vcpu); vcpu->arch.mmio_fault_cr2 = cr2; @@ -4542,9 +4544,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - /* this is needed for vmware backdor interface to work since it + /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + } restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); @@ -4565,19 +4570,28 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; + else + writeback = false; r = EMULATE_DO_MMIO; - } else if (vcpu->mmio_needed) + } else if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + writeback = false; r = EMULATE_DO_MMIO; - else if (r == EMULATION_RESTART) + } else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + if (writeback) { + toggle_interruptibility(vcpu, + vcpu->arch.emulate_ctxt.interruptibility); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + } else + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; return r; } @@ -5587,6 +5601,18 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { + /* + * We are here if userspace calls get_regs() in the middle of + * instruction emulation. Registers state needs to be copied + * back from emulation context to vcpu. Usrapace shouldn't do + * that usually, but some bad designed PV devices (vmware + * backdoor interface) need this to work + */ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -5614,6 +5640,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); -- cgit v1.2.3-70-g09d2 From be6d05cfdf1f2ddbdc367a6433d8eac49d6bfe6f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Apr 2011 01:27:55 +0200 Subject: KVM: VMX: Ensure that vmx_create_vcpu always returns proper error In case certain allocations fail, vmx_create_vcpu may return 0 as error instead of a negative value encoded via ERR_PTR. This causes a NULL pointer dereferencing later on in kvm_vm_ioctl_vcpu_create. Reported-by: Sasha Levin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aabe3334d06..af520698315 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4251,8 +4251,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; if (!vmx->guest_msrs) { - err = -ENOMEM; goto uninit_vcpu; } @@ -4271,7 +4271,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vmcs; if (vm_need_virtualize_apic_accesses(kvm)) - if (alloc_apic_access_page(kvm) != 0) + err = alloc_apic_access_page(kvm); + if (err) goto free_vmcs; if (enable_ept) { -- cgit v1.2.3-70-g09d2 From 575e7c1417d41dd72ddf2a49965f833ce9352e92 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:24:55 +0900 Subject: KVM: x86 emulator: Disable writeback for CMP emulation This stops "CMP r/m, reg" to write back the data into memory. Pointed out by Avi. The writeback suppression now covers CMP, CMPS, SCAS. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2ec69bc8584..1e0e3f8156f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3671,6 +3671,7 @@ special_insn: break; case 0x38 ... 0x3d: cmp: /* cmp */ + c->dst.type = OP_NONE; /* Disable writeback. */ emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); break; case 0x40 ... 0x47: /* inc r16/r32 */ @@ -3797,7 +3798,6 @@ special_insn: rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); break; case 0xa6 ... 0xa7: /* cmps */ - c->dst.type = OP_NONE; /* Disable writeback. */ goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; -- cgit v1.2.3-70-g09d2 From 4179bb02fd3e87183e5f698495dfcb80df187889 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:29:09 +0900 Subject: KVM: x86 emulator: Make emulate_push() store the value directly PUSH emulation stores the value by calling writeback() after setting the dst operand appropriately in emulate_push(). This writeback() using dst is not needed at all because we know the target is the stack. So this patch makes emulate_push() call, newly introduced, segmented_write() directly. By this, many inlined writeback()'s are removed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 87 ++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 53 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1e0e3f8156f..4f4d9bc6178 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1345,17 +1345,19 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_push(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + struct segmented_address addr; - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); - c->dst.addr.mem.seg = VCPU_SREG_SS; + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); + addr.seg = VCPU_SREG_SS; + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, @@ -1417,14 +1419,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) { struct decode_cache *c = &ctxt->decode; c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); - emulate_push(ctxt, ops); + return emulate_push(ctxt, ops); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1454,18 +1456,13 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - emulate_push(ctxt, ops); - - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; ++reg; } - /* Disable writeback. */ - c->dst.type = OP_NONE; - return rc; } @@ -1503,27 +1500,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = emulate_push(ctxt, ops); if (rc != X86EMUL_CONTINUE) return rc; - c->dst.type = OP_NONE; - ops->get_idt(&dt, ctxt->vcpu); eip_addr = dt.address + (irq << 2); @@ -1713,6 +1705,7 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + int rc = X86EMUL_CONTINUE; switch (c->modrm_reg) { case 0: /* inc */ @@ -1726,17 +1719,17 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; case 6: /* push */ - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } - return X86EMUL_CONTINUE; + return rc; } static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, @@ -2380,7 +2373,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - emulate_push(ctxt, ops); + ret = emulate_push(ctxt, ops); } return ret; @@ -2400,11 +2393,8 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) { - rc = writeback(ctxt, ops); - if (rc == X86EMUL_CONTINUE) - ctxt->eip = c->eip; - } + if (rc == X86EMUL_CONTINUE) + ctxt->eip = c->eip; return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } @@ -2422,8 +2412,7 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, static int em_push(struct x86_emulate_ctxt *ctxt) { - emulate_push(ctxt, ctxt->ops); - return X86EMUL_CONTINUE; + return emulate_push(ctxt, ctxt->ops); } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -2483,20 +2472,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); + rc = emulate_push(ctxt, ctxt->ops); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); - if (rc != X86EMUL_CONTINUE) - return rc; - - c->dst.type = OP_NONE; - - return X86EMUL_CONTINUE; + return emulate_push(ctxt, ctxt->ops); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -3625,7 +3606,7 @@ special_insn: emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); break; case 0x06: /* push es */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); @@ -3635,14 +3616,14 @@ special_insn: emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); break; case 0x0e: /* push cs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; case 0x10 ... 0x15: adc: /* adc */ emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); break; case 0x16: /* push ss */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); @@ -3652,7 +3633,7 @@ special_insn: emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); break; case 0x1e: /* push ds */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); @@ -3789,7 +3770,7 @@ special_insn: break; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; case 0x9d: /* popf */ c->dst.type = OP_REG; @@ -3864,7 +3845,7 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - emulate_push(ctxt, ops); + rc = emulate_push(ctxt, ops); break; } case 0xe9: /* jmp rel */ @@ -4157,7 +4138,7 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -4174,7 +4155,7 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); -- cgit v1.2.3-70-g09d2 From 4487b3b48d8fa3f6a5dd4155c9e34d5e998ad7fe Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 13 Apr 2011 00:31:23 +0900 Subject: KVM: x86 emulator: Use em_push() instead of emulate_push() em_push() is a simple wrapper of emulate_push(). So this patch replaces emulate_push() with em_push() and removes the unnecessary former. In addition, the unused ops arguments are removed from emulate_pusha() and emulate_grp45(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4f4d9bc6178..cb2efa46379 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1345,8 +1345,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; struct segmented_address addr; @@ -1426,7 +1425,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); - return emulate_push(ctxt, ops); + return em_push(ctxt); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1444,8 +1443,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1456,7 +1454,7 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1500,19 +1498,19 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1701,8 +1699,7 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1719,14 +1716,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; case 6: /* push */ - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } return rc; @@ -2373,7 +2370,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - ret = emulate_push(ctxt, ops); + ret = em_push(ctxt); } return ret; @@ -2410,11 +2407,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, op->addr.mem.seg = seg; } -static int em_push(struct x86_emulate_ctxt *ctxt) -{ - return emulate_push(ctxt, ctxt->ops); -} - static int em_das(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2472,12 +2464,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - rc = emulate_push(ctxt, ctxt->ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - return emulate_push(ctxt, ctxt->ops); + return em_push(ctxt); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -3666,7 +3658,7 @@ special_insn: rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); break; case 0x60: /* pusha */ - rc = emulate_pusha(ctxt, ops); + rc = emulate_pusha(ctxt); break; case 0x61: /* popa */ rc = emulate_popa(ctxt, ops); @@ -3770,7 +3762,7 @@ special_insn: break; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; case 0x9d: /* popf */ c->dst.type = OP_REG; @@ -3845,7 +3837,7 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - rc = emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 0xe9: /* jmp rel */ @@ -3923,7 +3915,7 @@ special_insn: break; case 0xfe: /* Grp4 */ grp45: - rc = emulate_grp45(ctxt, ops); + rc = emulate_grp45(ctxt); break; case 0xff: /* Grp5 */ if (c->modrm_reg == 5) -- cgit v1.2.3-70-g09d2 From 71f9833bb1cba9939245f3e57388d87d69f8f399 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 13 Apr 2011 09:12:54 -0500 Subject: KVM: fix push of wrong eip when doing softint When doing a soft int, we need to bump eip before pushing it to the stack. Otherwise we'll do the int a second time. [apw@canonical.com: merged eip update as per Jan's recommendation.] Signed-off-by: Serge E. Hallyn Signed-off-by: Andy Whitcroft Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 12 +++++++++--- arch/x86/kvm/x86.c | 5 +++-- arch/x86/kvm/x86.h | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af520698315..3f6e9bff016 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1063,7 +1063,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2942,7 +2945,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2977,7 +2983,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3416a347384..b05e3fcce29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4423,7 +4423,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int ret; @@ -4432,7 +4432,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.decode.op_bytes = 2; vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + + inc_eip; ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); if (ret != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce..e407ed3df81 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -77,7 +77,7 @@ static inline u32 bit(int bitno) void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); -- cgit v1.2.3-70-g09d2 From 1e015968dfb9289934ad66a67fa38a93f0adf4f8 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Mon, 11 Apr 2011 12:56:01 +0800 Subject: KVM: remove useless function declarations from file arch/x86/kvm/irq.h Just remove useless function define kvm_pic_clear_isr_ack() and pit_has_pending_timer() Signed-off-by: Duan Jiong Signed-off-by: Avi Kivity --- arch/x86/kvm/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d14941..53e2d084bff 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_destroy_pic(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -void kvm_pic_clear_isr_ack(struct kvm *kvm); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) { @@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); -int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); #endif -- cgit v1.2.3-70-g09d2 From 49704f26586ca87fcab4fe9323fff8db41e78910 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Mon, 11 Apr 2011 12:44:06 +0800 Subject: KVM: remove useless function declaration kvm_inject_pit_timer_irqs() Just remove useless function define kvm_inject_pit_timer_irqs() from file arch/x86/kvm/i8254.h Signed-off-by:Duan Jiong Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index b681a9f78c5..51a97426e79 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -50,7 +50,6 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); -- cgit v1.2.3-70-g09d2 From 0521e4c0bc73aa86152ee4e4bd03724c8a9e1d6b Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Wed, 13 Apr 2011 11:44:13 -0400 Subject: KVM: x86 emulator: Handle wraparound in (cs_base + offset) when fetching insns Currently, setting a large (i.e. negative) base address for %cs does not work on a 64-bit host. The "JOS" teaching operating system, used by MIT and other universities, relies on such segments while bootstrapping its way to full virtual memory management. Signed-off-by: Nelson Elhage Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cb2efa46379..4c5ff22d101 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -637,9 +637,12 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { + unsigned long linear = eip + ctxt->cs_base; + if (ctxt->mode != X86EMUL_MODE_PROT64) + linear &= (u32)-1; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, + rc = ops->fetch(linear, fc->data + cur_size, size, ctxt->vcpu, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; -- cgit v1.2.3-70-g09d2 From 977b2d03e42e9ea9355d4baddb464810579719bd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 Apr 2011 11:42:52 +0200 Subject: KVM: SVM: Fix nested sel_cr0 intercept path with decode-assists This patch fixes a bug in the nested-svm path when decode-assists is available on the machine. After a selective-cr0 intercept is detected the rip is advanced unconditionally. This causes the l1-guest to continue running with an l2-rip. This bug was with the sel_cr0 unit-test on decode-assists capable hardware. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a6bf2ad7429..de4bba99160 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2799,6 +2799,9 @@ static int cr_interception(struct vcpu_svm *svm) case 0: if (!check_selective_cr0_intercepted(svm, val)) err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); -- cgit v1.2.3-70-g09d2 From 7c4c0f4fd5c3e82234c0ab61c7e7ffdb8f3af07b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 Apr 2011 11:42:53 +0200 Subject: KVM: X86: Update last_guest_tsc in vcpu_put The last_guest_tsc is used in vcpu_load to adjust the tsc_offset since tsc-scaling is merged. So the last_guest_tsc needs to be updated in vcpu_put instead of the the last_host_tsc. This is fixed with this patch. Reported-by: Jan Kiszka Tested-by: Jan Kiszka Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3e03f37f43e..e50bffcf3cc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -393,7 +393,6 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b05e3fcce29..6aa137701cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,7 +2146,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); } static int is_efer_nx(void) -- cgit v1.2.3-70-g09d2 From 3d9b938eefb7d91a1ae13e425931bd5ac103b762 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Mon, 18 Apr 2011 12:05:53 -0400 Subject: KVM: emulator: Use linearize() when fetching instructions Since segments need to be handled slightly differently when fetching instructions, we add a __linearize helper that accepts a new 'fetch' boolean. [avi: fix oops caused by wrong segmented_address initialization order] Signed-off-by: Nelson Elhage Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 - arch/x86/kvm/emulate.c | 26 ++++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 081844860a3..9b760c8f257 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -265,7 +265,6 @@ struct x86_emulate_ctxt { unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4c5ff22d101..e1f77de9540 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,9 +540,9 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } -static int linearize(struct x86_emulate_ctxt *ctxt, +static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, - unsigned size, bool write, + unsigned size, bool write, bool fetch, ulong *linear) { struct decode_cache *c = &ctxt->decode; @@ -569,7 +569,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, if (((desc.type & 8) || !(desc.type & 2)) && write) goto bad; /* unreadable code segment */ - if ((desc.type & 8) && !(desc.type & 2)) + if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); if ((desc.type & 8) || !(desc.type & 4)) { @@ -602,7 +602,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, } break; } - if (c->ad_bytes != 8) + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) la &= (u32)-1; *linear = la; return X86EMUL_CONTINUE; @@ -613,6 +613,15 @@ bad: return emulate_gp(ctxt, addr.seg); } +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, @@ -637,11 +646,13 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { - unsigned long linear = eip + ctxt->cs_base; - if (ctxt->mode != X86EMUL_MODE_PROT64) - linear &= (u32)-1; + unsigned long linear; + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; rc = ops->fetch(linear, fc->data + cur_size, size, ctxt->vcpu, &ctxt->exception); if (rc != X86EMUL_CONTINUE) @@ -3127,7 +3138,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->fetch.end = c->fetch.start + insn_len; if (insn_len > 0) memcpy(c->fetch.data, insn, insn_len); - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: -- cgit v1.2.3-70-g09d2 From 7295261cdd42e6d41666df38d1b613cdd9e95f46 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:12:27 +0300 Subject: KVM: x86 emulator: whitespace cleanups Clean up lines longer than 80 columns. No code changes. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 96 ++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e1f77de9540..4a5b61ff0ae 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -262,42 +262,42 @@ struct gprefix { "w", "r", _LO32, "r", "", "r") /* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) #define __emulate_1op(_op, _dst, _eflags, _suffix) \ @@ -360,13 +360,25 @@ struct gprefix { } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ - do { \ - switch((_src).bytes) { \ - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ +#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ + do { \ + switch((_src).bytes) { \ + case 1: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "b"); \ + break; \ + case 2: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "w"); \ + break; \ + case 4: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "l"); \ + break; \ + case 8: \ + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "q")); \ + break; \ } \ } while (0) @@ -402,7 +414,7 @@ struct gprefix { (_type)_x; \ }) -#define insn_fetch_arr(_arr, _size, _eip) \ +#define insn_fetch_arr(_arr, _size, _eip) \ ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ -- cgit v1.2.3-70-g09d2 From 0f65dd70a442ff498da10cec0a599fbd9d2d6f9e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from memory read/write callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 34 +++++++++++------------- arch/x86/kvm/emulate.c | 54 ++++++++++++++++++-------------------- arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++-------------- 3 files changed, 75 insertions(+), 67 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 9b760c8f257..b4d846708a4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -92,8 +92,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, struct x86_exception *fault); /* @@ -103,8 +104,8 @@ struct x86_emulate_ops { * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. @@ -113,8 +114,8 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* @@ -123,11 +124,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_emulated)(unsigned long addr, - void *val, - unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); /* * write_emulated: Write bytes to emulated/special memory area. @@ -136,11 +135,10 @@ struct x86_emulate_ops { * required). * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_emulated)(unsigned long addr, - const void *val, + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -150,12 +148,12 @@ struct x86_emulate_ops { * @new: [IN ] Value to write to @addr. * @bytes: [IN ] Number of bytes to access using CMPXCHG. */ - int (*cmpxchg_emulated)(unsigned long addr, + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); int (*pio_in_emulated)(int size, unsigned short port, void *val, unsigned int count, struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a5b61ff0ae..ff64b17df77 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -645,8 +645,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->read_std(linear, data, size, ctxt->vcpu, - &ctxt->exception); + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, @@ -665,8 +664,8 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, rc = __linearize(ctxt, addr, size, false, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->fetch(linear, fc->data + cur_size, - size, ctxt->vcpu, &ctxt->exception); + rc = ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -1047,8 +1046,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(addr, mc->data + mc->end, n, - &ctxt->exception, ctxt->vcpu); + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -1087,8 +1086,8 @@ static int segmented_write(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->write_emulated(linear, data, size, - &ctxt->exception, ctxt->vcpu); + return ctxt->ops->write_emulated(ctxt, linear, data, size, + &ctxt->exception); } static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, @@ -1102,8 +1101,8 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->cmpxchg_emulated(linear, orig_data, data, - size, &ctxt->exception, ctxt->vcpu); + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, + size, &ctxt->exception); } static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, @@ -1168,8 +1167,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -1190,8 +1188,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -1545,11 +1542,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; @@ -2036,13 +2033,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, - NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -2150,7 +2146,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2158,13 +2154,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, save_state_to_tss16(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2173,10 +2169,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -2282,7 +2278,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2290,13 +2286,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, save_state_to_tss32(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2305,10 +2301,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6aa137701cd..274652ae6d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -63,6 +63,9 @@ #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define emul_to_vcpu(ctxt) \ + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -3760,37 +3763,43 @@ out: } /* used for instruction fetching */ -static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access | PFERR_FETCH_MASK, exception); } -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, +static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); void *data = val; int r = X86EMUL_CONTINUE; @@ -3818,12 +3827,13 @@ out: return r; } -static int emulator_read_emulated(unsigned long addr, +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; int handled; @@ -3844,7 +3854,7 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -3933,12 +3943,14 @@ mmio: return X86EMUL_CONTINUE; } -int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; @@ -3966,13 +3978,14 @@ int emulator_write_emulated(unsigned long addr, (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) #endif -static int emulator_cmpxchg_emulated(unsigned long addr, +static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; struct page *page; char *kaddr; @@ -4028,7 +4041,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, emul_write: printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); - return emulator_write_emulated(addr, new, bytes, exception, vcpu); + return emulator_write_emulated(ctxt, addr, new, bytes, exception); } static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -5009,7 +5022,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, + rip, instruction, 3, NULL); } void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -- cgit v1.2.3-70-g09d2 From ca1d4a9e772bde0a0b8cda61ee9fdca29f80f361 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from pio callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 10 ++++++---- arch/x86/kvm/emulate.c | 6 +++--- arch/x86/kvm/x86.c | 18 ++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b4d846708a4..1348bdf14a4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -155,11 +155,13 @@ struct x86_emulate_ops { unsigned int bytes, struct x86_exception *fault); - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, int seg, struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ff64b17df77..8af08a16f4d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1125,7 +1125,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -3892,8 +3892,8 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - ops->pio_out_emulated(c->src.bytes, c->dst.val, - &c->src.val, 1, ctxt->vcpu); + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, + &c->src.val, 1); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 274652ae6d5..e9040a9b25c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4060,9 +4060,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) +static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + if (vcpu->arch.pio.count) goto data_avail; @@ -4090,10 +4093,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, return 0; } -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) +static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, + const void *val, unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + trace_kvm_pio(1, port, size, count); vcpu->arch.pio.port = port; @@ -4614,7 +4619,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, + size, port, &val, 1); /* do not return to emulator after return from userspace */ vcpu->arch.pio.count = 0; return ret; -- cgit v1.2.3-70-g09d2 From 4bff1e86ad286d4b3a54902540abeeaf95e64db3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from segment/gdt/idt callbacks Making the emulator caller agnostic. [Takuya Yoshikawa: fix typo leading to LDT failures] Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 22 +++++--- arch/x86/kvm/emulate.c | 112 ++++++++++++++++++------------------- arch/x86/kvm/x86.c | 39 +++++++------ 3 files changed, 90 insertions(+), 83 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1348bdf14a4..656046a1bd5 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -163,15 +163,19 @@ struct x86_emulate_ops { int size, unsigned short port, const void *val, unsigned int count); - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); + bool (*get_cached_descriptor)(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 *base3, + int seg); + void (*set_cached_descriptor)(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 base3, + int seg); + u16 (*get_segment_selector)(struct x86_emulate_ctxt *ctxt, int seg); + void (*set_segment_selector)(struct x86_emulate_ctxt *ctxt, + u16 sel, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); int (*cpl)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8af08a16f4d..9602889f0f7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -495,7 +495,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(seg, ctxt->vcpu); + return ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, @@ -573,8 +573,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, 0); break; default: - usable = ctxt->ops->get_cached_descriptor(&desc, NULL, addr.seg, - ctxt->vcpu); + usable = ctxt->ops->get_cached_descriptor(ctxt, &desc, NULL, + addr.seg); if (!usable) goto bad; /* code segment or read-only data segment */ @@ -597,7 +597,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt->vcpu); - rpl = ctxt->ops->get_segment_selector(addr.seg, ctxt->vcpu) & 3; + rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ @@ -1142,14 +1142,14 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, if (selector & 1 << 2) { struct desc_struct desc; memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, - ctxt->vcpu)) + if (!ops->get_cached_descriptor(ctxt, &desc, NULL, + VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ dt->address = get_desc_base(&desc); } else - ops->get_gdt(dt, ctxt->vcpu); + ops->get_gdt(ctxt, dt); } /* allowed just for 8 bytes segments */ @@ -1304,8 +1304,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); + ops->set_segment_selector(ctxt, selector, seg); + ops->set_cached_descriptor(ctxt, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1446,7 +1446,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); + c->src.val = ops->get_segment_selector(ctxt, seg); return em_push(ctxt); } @@ -1527,7 +1527,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + c->src.val = ops->get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1537,7 +1537,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - ops->get_idt(&dt, ctxt->vcpu); + ops->get_idt(ctxt, &dt); eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; @@ -1814,7 +1814,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct desc_struct *ss) { memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); + ops->get_cached_descriptor(ctxt, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1861,10 +1861,10 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; if (is_long_mode(ctxt->vcpu)) { @@ -1933,10 +1933,10 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; @@ -1990,10 +1990,10 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); + ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -2024,7 +2024,7 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_descriptor(ctxt, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -2079,11 +2079,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -2108,11 +2108,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + ops->set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2199,13 +2199,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = ops->get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = ops->get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -2232,13 +2232,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + ops->set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + ops->set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + ops->set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2320,9 +2320,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + u16 old_tss_sel = ops->get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; /* FIXME: old_tss_base == ~0 ? */ @@ -2383,8 +2383,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, } ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); + ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2475,7 +2475,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + old_cs = ctxt->ops->get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -3743,7 +3743,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e9040a9b25c..6a7fbf671b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4237,28 +4237,29 @@ static int emulator_get_cpl(struct kvm_vcpu *vcpu) return kvm_x86_ops->get_cpl(vcpu); } -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_gdt(vcpu, dt); + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_idt(vcpu, dt); + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } -static unsigned long emulator_get_cached_segment_base(int seg, - struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cached_segment_base( + struct x86_emulate_ctxt *ctxt, int seg) { - return get_segment_base(vcpu, seg); + return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu) +static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; - kvm_get_segment(vcpu, &var, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); if (var.unusable) return false; @@ -4283,9 +4284,11 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, return true; } -static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu) +static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, + struct desc_struct *desc, u32 base3, + int seg) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; /* needed to preserve selector */ @@ -4314,22 +4317,22 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, return; } -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +static u16 emulator_get_segment_selector(struct x86_emulate_ctxt *ctxt, int seg) { struct kvm_segment kvm_seg; - kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); return kvm_seg.selector; } -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) +static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, + u16 sel, int seg) { struct kvm_segment kvm_seg; - kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); + kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); } static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -- cgit v1.2.3-70-g09d2 From 717746e382e58f075642403eaac26bce0640b2c5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from cr/dr/cpl/msr callbacks Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 14 +++---- arch/x86/kvm/emulate.c | 84 +++++++++++++++++++------------------- arch/x86/kvm/x86.c | 34 ++++++++++----- 3 files changed, 73 insertions(+), 59 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 656046a1bd5..2c02e753ab8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,13 +176,13 @@ struct x86_emulate_ops { int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9602889f0f7..33ad16b7db2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -596,7 +596,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; } - cpl = ctxt->ops->cpl(ctxt->vcpu); + cpl = ctxt->ops->cpl(ctxt); rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { @@ -1248,7 +1248,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); + cpl = ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1407,7 +1407,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt); rc = emulate_pop(ctxt, ops, &val, len); if (rc != X86EMUL_CONTINUE) @@ -1852,7 +1852,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); @@ -1871,17 +1871,17 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; - ops->get_msr(ctxt->vcpu, + ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? MSR_LSTAR : MSR_CSTAR, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~(msr_data | EFLG_RF); #endif } else { /* legacy mode */ - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); c->eip = (u32)msr_data; ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1910,7 +1910,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { case X86EMUL_MODE_PROT32: if ((msr_data & 0xfffc) == 0x0) @@ -1938,10 +1938,10 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); c->regs[VCPU_REGS_RSP] = msr_data; return X86EMUL_CONTINUE; @@ -1970,7 +1970,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.dpl = 3; ss.dpl = 3; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (usermode) { case X86EMUL_MODE_PROT32: cs_sel = (u16)(msr_data + 16); @@ -2010,7 +2010,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -2187,7 +2187,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->cr3 = ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -2215,7 +2215,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) + if (ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -2338,7 +2338,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) + ops->cpl(ctxt) > next_tss_desc.dpl) return emulate_gp(ctxt, 0); } @@ -2382,7 +2382,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, &next_tss_desc); } - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); @@ -2542,7 +2542,7 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; return X86EMUL_CONTINUE; @@ -2625,8 +2625,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) return emulate_gp(ctxt, 0); - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && !(cr4 & X86_CR4_PAE)) @@ -2652,8 +2652,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) case 4: { u64 cr4, efer; - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) return emulate_gp(ctxt, 0); @@ -2669,7 +2669,7 @@ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) { unsigned long dr7; - ctxt->ops->get_dr(7, &dr7, ctxt->vcpu); + ctxt->ops->get_dr(ctxt, 7, &dr7); /* Check if DR7.Global_Enable is set */ return dr7 & (1 << 13); @@ -2684,7 +2684,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if (dr > 7) return emulate_ud(ctxt); - cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + cr4 = ctxt->ops->get_cr(ctxt, 4); if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); @@ -2710,7 +2710,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) { u64 efer; - ctxt->ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (!(efer & EFER_SVME)) return emulate_ud(ctxt); @@ -2731,9 +2731,9 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) static int check_rdtsc(struct x86_emulate_ctxt *ctxt) { - u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt->vcpu)) + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) return emulate_ud(ctxt); return X86EMUL_CONTINUE; @@ -2741,10 +2741,10 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { - u64 cr4 = ctxt->ops->get_cr(4, ctxt->vcpu); + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); - if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt->vcpu)) || + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) return emulate_gp(ctxt, 0); @@ -3514,13 +3514,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->d & Sse) - && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM) - || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) { + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { rc = emulate_ud(ctxt); goto done; } - if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) { + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { rc = emulate_nm(ctxt); goto done; } @@ -3533,7 +3533,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } @@ -4052,11 +4052,11 @@ twobyte_insn: break; case 4: /* smsw */ c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, 0); break; case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | - (c->src.val & 0x0f), ctxt->vcpu); + ops->set_cr(ctxt, 0, (ops->get_cr(ctxt, 0) & ~0x0eul) | + (c->src.val & 0x0f)); c->dst.type = OP_NONE; break; case 5: /* not defined */ @@ -4084,13 +4084,13 @@ twobyte_insn: case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); break; case 0x21: /* mov from dr to reg */ - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); break; case 0x22: /* mov reg, cr */ - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -4098,9 +4098,9 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if (ops->set_dr(c->modrm_reg, c->src.val & + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? - ~0ULL : ~0U), ctxt->vcpu) < 0) { + ~0ULL : ~0U)) < 0) { /* #UD condition is already handled by the code above */ emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; @@ -4113,7 +4113,7 @@ twobyte_insn: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -4122,7 +4122,7 @@ twobyte_insn: break; case 0x32: /* rdmsr */ - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6a7fbf671b2..16373a5bfd0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4160,15 +4160,15 @@ int emulate_clts(struct kvm_vcpu *vcpu) return X86EMUL_CONTINUE; } -int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(vcpu, dr, dest); + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { - return __kvm_set_dr(vcpu, dr, value); + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); } static u64 mk_cr_64(u64 curr_cr, u32 new_val) @@ -4176,8 +4176,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); unsigned long value; switch (cr) { @@ -4204,8 +4205,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) return value; } -static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; switch (cr) { @@ -4232,9 +4234,9 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) return res; } -static int emulator_get_cpl(struct kvm_vcpu *vcpu) +static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) { - return kvm_x86_ops->get_cpl(vcpu); + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); } static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) @@ -4335,6 +4337,18 @@ static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); } +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) +{ + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} + +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); +} + static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); @@ -4379,8 +4393,8 @@ static struct x86_emulate_ops emulate_ops = { .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .set_msr = kvm_set_msr, - .get_msr = kvm_get_msr, + .set_msr = emulator_set_msr, + .get_msr = emulator_get_msr, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, -- cgit v1.2.3-70-g09d2 From 2953538ebbd95b145bd3629126fe5af61b88be11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 13:37:53 +0300 Subject: KVM: x86 emulator: drop vcpu argument from intercept callback Making the emulator caller agnostic. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2c02e753ab8..e2b082aa320 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -185,7 +185,7 @@ struct x86_emulate_ops { int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ - int (*intercept)(struct kvm_vcpu *vcpu, + int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 33ad16b7db2..acb9fcc283e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -438,7 +438,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .next_rip = ctxt->eip, }; - return ctxt->ops->intercept(ctxt->vcpu, &info, stage); + return ctxt->ops->intercept(ctxt, &info, stage); } static inline unsigned long ad_mask(struct decode_cache *c) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 16373a5bfd0..4f7248ea6ca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4365,11 +4365,11 @@ static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) preempt_enable(); } -static int emulator_intercept(struct kvm_vcpu *vcpu, +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return kvm_x86_ops->check_intercept(vcpu, info, stage); + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } static struct x86_emulate_ops emulate_ops = { -- cgit v1.2.3-70-g09d2 From fe870ab9ce1c3e64c6d6b6ee3fe53d0d029f1044 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:01:23 +0300 Subject: KVM: x86 emulator: avoid using ctxt->vcpu in check_perm() callbacks Unneeded for register access. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index acb9fcc283e..0ff7d4bd1bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2720,7 +2720,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) static int check_svme_pa(struct x86_emulate_ctxt *ctxt) { - u64 rax = kvm_register_read(ctxt->vcpu, VCPU_REGS_RAX); + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; /* Valid physical address? */ if (rax & 0xffff000000000000) @@ -2742,7 +2742,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - u64 rcx = kvm_register_read(ctxt->vcpu, VCPU_REGS_RCX); + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) -- cgit v1.2.3-70-g09d2 From 1ac9d0cfb07e8ac3b5007d8279c5bd56e124250c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:12:00 +0300 Subject: KVM: x86 emulator: add and use new callbacks set_idt(), set_gdt() Replacing direct calls to realmode_lgdt(), realmode_lidt(). Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/include/asm/kvm_host.h | 3 --- arch/x86/kvm/emulate.c | 14 +++++++------- arch/x86/kvm/x86.c | 26 ++++++++++++-------------- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e2b082aa320..4d1546aa610 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,6 +176,8 @@ struct x86_emulate_ops { int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); int (*cpl)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e50bffcf3cc..a8616ca8320 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -681,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); - void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ff7d4bd1bb..57e0e291b38 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3494,6 +3494,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) int rc = X86EMUL_CONTINUE; int saved_dst_type = c->dst.type; int irq; /* Used for int 3, int, and into */ + struct desc_ptr desc_ptr; ctxt->decode.mem_read.pos = 0; @@ -4005,9 +4006,6 @@ twobyte_insn: switch (c->b) { case 0x01: /* lgdt, lidt, lmsw */ switch (c->modrm_reg) { - u16 size; - unsigned long address; - case 0: /* vmcall */ if (c->modrm_mod != 3 || c->modrm_rm != 1) goto cannot_emulate; @@ -4023,10 +4021,11 @@ twobyte_insn: break; case 2: /* lgdt */ rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, c->op_bytes); + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); if (rc != X86EMUL_CONTINUE) goto done; - realmode_lgdt(ctxt->vcpu, size, address); + ctxt->ops->set_gdt(ctxt, &desc_ptr); /* Disable writeback. */ c->dst.type = OP_NONE; break; @@ -4041,11 +4040,12 @@ twobyte_insn: } } else { rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, + &desc_ptr.size, + &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) goto done; - realmode_lidt(ctxt->vcpu, size, address); + ctxt->ops->set_idt(ctxt, &desc_ptr); } /* Disable writeback. */ c->dst.type = OP_NONE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f7248ea6ca..7cd3a3b491d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4249,6 +4249,16 @@ static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } +static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); +} + +static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); +} + static unsigned long emulator_get_cached_segment_base( struct x86_emulate_ctxt *ctxt, int seg) { @@ -4388,6 +4398,8 @@ static struct x86_emulate_ops emulate_ops = { .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, + .set_gdt = emulator_set_gdt, + .set_idt = emulator_set_idt, .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, @@ -5049,20 +5061,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) rip, instruction, 3, NULL); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_gdt(vcpu, &dt); -} - -void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_idt(vcpu, &dt); -} - static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) { struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; -- cgit v1.2.3-70-g09d2 From c2ad2bb3ef870067ecfc9ccdcf465feb51f2b6a5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:21:35 +0300 Subject: KVM: x86 emulator: drop use of is_long_mode() Requires ctxt->vcpu, which is to be abolished. Replace with open calls to get_msr(). Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 57e0e291b38..be1532f4b8b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1844,12 +1844,14 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; /* syscall is not available in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); ops->get_msr(ctxt, MSR_STAR, &msr_data); @@ -1857,7 +1859,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } @@ -1867,7 +1869,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; @@ -1897,7 +1899,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1927,8 +1931,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 - || is_long_mode(ctxt->vcpu)) { + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { cs.d = 0; cs.l = 1; } @@ -2603,6 +2606,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; u64 new_val = c->src.val64; int cr = c->modrm_reg; + u64 efer = 0; static u64 cr_reserved_bits[] = { 0xffffffff00000000ULL, @@ -2620,7 +2624,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) switch (cr) { case 0: { - u64 cr4, efer; + u64 cr4; if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) return emulate_gp(ctxt, 0); @@ -2637,7 +2641,8 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) case 3: { u64 rsvd = 0; - if (is_long_mode(ctxt->vcpu)) + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) rsvd = CR3_L_MODE_RESERVED_BITS; else if (is_pae(ctxt->vcpu)) rsvd = CR3_PAE_RESERVED_BITS; @@ -2650,7 +2655,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) break; } case 4: { - u64 cr4, efer; + u64 cr4; cr4 = ctxt->ops->get_cr(ctxt, 4); ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); -- cgit v1.2.3-70-g09d2 From fd72c4192220d0086fb24356ac6ff9c3b1e067d9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:24:32 +0300 Subject: KVM: x86 emulator: Replace calls to is_pae() and is_paging with ->get_cr() Avoid use of ctxt->vcpu. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index be1532f4b8b..6a512532866 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2644,9 +2644,9 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) rsvd = CR3_L_MODE_RESERVED_BITS; - else if (is_pae(ctxt->vcpu)) + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) rsvd = CR3_PAE_RESERVED_BITS; - else if (is_paging(ctxt->vcpu)) + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) rsvd = CR3_NONPAE_RESERVED_BITS; if (new_val & rsvd) -- cgit v1.2.3-70-g09d2 From 2d04a05bd7e93c13f13a82ac40de4065a99d069b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:32:49 +0300 Subject: KVM: x86 emulator: emulate CLTS internally Avoid using ctxt->vcpu; we can do everything with ->get_cr() and ->set_cr(). A side effect is that we no longer activate the fpu on emulated CLTS; but that should be very rare. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/emulate.c | 12 +++++++++++- arch/x86/kvm/x86.c | 7 ------- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a8616ca8320..9c3567e0f73 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -691,7 +691,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); -int emulate_clts(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6a512532866..2b903a32609 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2579,6 +2579,16 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clts(struct x86_emulate_ctxt *ctxt) +{ + ulong cr0; + + cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 &= ~X86_CR0_TS; + ctxt->ops->set_cr(ctxt, 0, cr0); + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -4079,7 +4089,7 @@ twobyte_insn: rc = emulate_syscall(ctxt, ops); break; case 0x06: - emulate_clts(ctxt->vcpu); + rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ kvm_emulate_wbinvd(ctxt->vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7cd3a3b491d..a9e83862feb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4153,13 +4153,6 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulate_clts(struct kvm_vcpu *vcpu) -{ - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - kvm_x86_ops->fpu_activate(vcpu); - return X86EMUL_CONTINUE; -} - int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); -- cgit v1.2.3-70-g09d2 From 3cb16fe78ce91991a876c74fc5dc99419b737b7a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:38:44 +0300 Subject: KVM: x86 emulator: make emulate_invlpg() an emulator callback Removing direct calls to KVM. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 +++--- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 4d1546aa610..f8907694370 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -154,6 +154,7 @@ struct x86_emulate_ops { const void *new, unsigned int bytes, struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, int size, unsigned short port, void *val, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c3567e0f73..d957d0d0656 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -690,7 +690,6 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b903a32609..5d774e91388 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2573,7 +2573,7 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt) rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); if (rc == X86EMUL_CONTINUE) - emulate_invlpg(ctxt->vcpu, linear); + ctxt->ops->invlpg(ctxt, linear); /* Disable writeback. */ c->dst.type = OP_NONE; return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9e83862feb..8af49b3df67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4128,10 +4128,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) return kvm_x86_ops->get_segment_base(vcpu, seg); } -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) +static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) { - kvm_mmu_invlpg(vcpu, address); - return X86EMUL_CONTINUE; + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) @@ -4382,6 +4381,7 @@ static struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, .get_cached_descriptor = emulator_get_cached_descriptor, -- cgit v1.2.3-70-g09d2 From 6c3287f7c5050076b554145f11bdba058de287d1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:43:05 +0300 Subject: KVM: x86 emulator: add new ->halt() callback Instead of reaching into vcpu internals. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index f8907694370..d30f1e9b754 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -186,6 +186,7 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5d774e91388..210df51b76a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3913,7 +3913,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ - ctxt->vcpu->arch.halt_request = 1; + ctxt->ops->halt(ctxt); break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8af49b3df67..2246cf1a4ee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4351,6 +4351,11 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); } +static void emulator_halt(struct x86_emulate_ctxt *ctxt) +{ + emul_to_vcpu(ctxt)->arch.halt_request = 1; +} + static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); @@ -4400,6 +4405,7 @@ static struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, + .halt = emulator_halt, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, -- cgit v1.2.3-70-g09d2 From d6aa10003b0cded5a538af0d198460e89dc2d6d2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:47:13 +0300 Subject: KVM: x86 emulator: add ->fix_hypercall() callback Artificial, but needed to remove direct calls to KVM. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/kvm/x86.c | 6 +++++- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index d30f1e9b754..d30840ddd2f 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -187,6 +187,7 @@ struct x86_emulate_ops { int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d957d0d0656..6cfc1ab2cdd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -752,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 210df51b76a..64e7373d3b2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4025,7 +4025,7 @@ twobyte_insn: if (c->modrm_mod != 3 || c->modrm_rm != 1) goto cannot_emulate; - rc = kvm_fix_hypercall(ctxt->vcpu); + rc = ctxt->ops->fix_hypercall(ctxt); if (rc != X86EMUL_CONTINUE) goto done; @@ -4048,7 +4048,7 @@ twobyte_insn: if (c->modrm_mod == 3) { switch (c->modrm_rm) { case 1: - rc = kvm_fix_hypercall(ctxt->vcpu); + rc = ctxt->ops->fix_hypercall(ctxt); break; default: goto cannot_emulate; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2246cf1a4ee..4a2b40e2502 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -152,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -4406,6 +4408,7 @@ static struct x86_emulate_ops emulate_ops = { .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .halt = emulator_halt, + .fix_hypercall = emulator_fix_hypercall, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, @@ -5042,8 +5045,9 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); -- cgit v1.2.3-70-g09d2 From bcaf5cc543bdb8f61fc3ce09944e0ecde2966595 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:53:23 +0300 Subject: KVM: x86 emulator: add new ->wbinvd() callback Instead of calling kvm_emulate_wbinvd() directly. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index d30840ddd2f..51341d6b2f3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -187,6 +187,7 @@ struct x86_emulate_ops { int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 64e7373d3b2..522bc35d290 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4092,7 +4092,7 @@ twobyte_insn: rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - kvm_emulate_wbinvd(ctxt->vcpu); + ctxt->ops->wbinvd(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a2b40e2502..5d853d540f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4154,6 +4154,11 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); +static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) +{ + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); +} + int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); @@ -4408,6 +4413,7 @@ static struct x86_emulate_ops emulate_ops = { .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .halt = emulator_halt, + .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, -- cgit v1.2.3-70-g09d2 From 5197b808a7f459f9c7436573c7785ff3c1324c08 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:55:40 +0300 Subject: KVM: Avoid using x86_emulate_ctxt.vcpu We can use container_of() instead. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d853d540f9..65a5b0c545a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4366,7 +4366,7 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { preempt_disable(); - kvm_load_guest_fpu(ctxt->vcpu); + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); /* * CR0.TS may reference the host fpu state, not the guest fpu state, * so it may be clear at this point. -- cgit v1.2.3-70-g09d2 From 13db70eca62c5bbb2cbbf6b23dadb94065d363d1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Apr 2011 15:56:20 +0300 Subject: KVM: x86 emulator: drop x86_emulate_ctxt::vcpu No longer used. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 -- arch/x86/kvm/x86.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 51341d6b2f3..127ea3e1717 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -269,8 +269,6 @@ struct x86_emulate_ctxt { struct x86_emulate_ops *ops; /* Register state before/after emulation. */ - struct kvm_vcpu *vcpu; - unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65a5b0c545a..a831d5d8ca1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4463,7 +4463,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - vcpu->arch.emulate_ctxt.vcpu = vcpu; vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = -- cgit v1.2.3-70-g09d2 From a78484c60e35555d6e0e5b1eb83d4913621c59fb Mon Sep 17 00:00:00 2001 From: "Roedel, Joerg" Date: Wed, 20 Apr 2011 15:33:16 +0200 Subject: KVM: MMU: Make cmpxchg_gpte aware of nesting too This patch makes the cmpxchg_gpte() function aware of the difference between l1-gfns and l2-gfns when nested virtualization is in use. This fixes a potential data-corruption problem in the l1-guest and makes the code work correct (at least as correct as the hardware which is emulated in this code) again. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 74f8567d57a..1b6899088f9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -78,15 +78,21 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, +static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t table_gfn, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { pt_element_t ret; pt_element_t *table; struct page *page; + gpa_t gpa; - page = gfn_to_page(kvm, table_gfn); + gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (gpa == UNMAPPED_GVA) + return -EFAULT; + + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -192,11 +198,17 @@ walk: #endif if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, - index, pte, pte|PT_ACCESSED_MASK)) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, + index, pte, pte|PT_ACCESSED_MASK); + if (ret < 0) { + present = false; + break; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -245,13 +257,17 @@ walk: goto error; if (write_fault && !is_dirty_gpte(pte)) { - bool ret; + int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, pte|PT_DIRTY_MASK); - if (ret) + if (ret < 0) { + present = false; + goto error; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; -- cgit v1.2.3-70-g09d2 From cfb223753c3750b28103136ad4d51d1a3ae4d64b Mon Sep 17 00:00:00 2001 From: Clemens Noss Date: Thu, 21 Apr 2011 21:16:05 +0200 Subject: KVM: x86 emulator: avoid calling wbinvd() macro Commit 0b56652e33c72092956c651ab6ceb9f0ad081153 fails to build: CC [M] arch/x86/kvm/emulate.o arch/x86/kvm/emulate.c: In function 'x86_emulate_insn': arch/x86/kvm/emulate.c:4095:25: error: macro "wbinvd" passed 1 arguments, but takes just 0 arch/x86/kvm/emulate.c:4095:3: warning: statement with no effect make[2]: *** [arch/x86/kvm/emulate.o] Error 1 make[1]: *** [arch/x86/kvm] Error 2 make: *** [arch/x86] Error 2 Work around this for now. Signed-off-by: Clemens Noss Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 522bc35d290..ccb8b383bea 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4092,7 +4092,7 @@ twobyte_insn: rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - ctxt->ops->wbinvd(ctxt); + (ctxt->ops->wbinvd)(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ -- cgit v1.2.3-70-g09d2 From d42244499f171a499aa6748bc52304bb40e68ecc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 21 Apr 2011 09:09:22 -0700 Subject: KVM: x86 emulator: fix const value warning on i386 in svm insn RAX check arch/x86/kvm/emulate.c:2598: warning: integer constant is too large for 'long' type Signed-off-by: Randy Dunlap Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ccb8b383bea..77a5f54f151 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2738,7 +2738,7 @@ static int check_svme_pa(struct x86_emulate_ctxt *ctxt) u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; /* Valid physical address? */ - if (rax & 0xffff000000000000) + if (rax & 0xffff000000000000ULL) return emulate_gp(ctxt, 0); return check_svme(ctxt); -- cgit v1.2.3-70-g09d2 From 26d05cc7403dfffbc5afd66a1292adfb7566e3ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:07:59 +0300 Subject: KVM: x86 emulator: move 0F 01 sub-opcodes into their own functions Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 146 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 77a5f54f151..8e1d0c8196b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2589,6 +2589,95 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_vmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return X86EMUL_UNHANDLEABLE; + + rc = ctxt->ops->fix_hypercall(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->eip; + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_gdt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_svm(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + switch (c->modrm_rm) { + case 1: + rc = ctxt->ops->fix_hypercall(ctxt); + break; + default: + return X86EMUL_UNHANDLEABLE; + } + /* Disable writeback. */ + c->dst.type = OP_NONE; + return rc; +} + +static int em_lidt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + &desc_ptr.size, + &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_idt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_smsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = 2; + c->dst.val = ctxt->ops->get_cr(ctxt, 0); + return X86EMUL_CONTINUE; +} + +static int em_lmsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) + | (c->src.val & 0x0f)); + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3509,7 +3598,6 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) int rc = X86EMUL_CONTINUE; int saved_dst_type = c->dst.type; int irq; /* Used for int 3, int, and into */ - struct desc_ptr desc_ptr; ctxt->decode.mem_read.pos = 0; @@ -4022,62 +4110,26 @@ twobyte_insn: case 0x01: /* lgdt, lidt, lmsw */ switch (c->modrm_reg) { case 0: /* vmcall */ - if (c->modrm_mod != 3 || c->modrm_rm != 1) - goto cannot_emulate; - - rc = ctxt->ops->fix_hypercall(ctxt); - if (rc != X86EMUL_CONTINUE) - goto done; - - /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_vmcall(ctxt); break; case 2: /* lgdt */ - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &desc_ptr.size, &desc_ptr.address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - ctxt->ops->set_gdt(ctxt, &desc_ptr); - /* Disable writeback. */ - c->dst.type = OP_NONE; + rc = em_lgdt(ctxt); break; case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) { - switch (c->modrm_rm) { - case 1: - rc = ctxt->ops->fix_hypercall(ctxt); - break; - default: - goto cannot_emulate; - } - } else { - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &desc_ptr.size, - &desc_ptr.address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - ctxt->ops->set_idt(ctxt, &desc_ptr); - } - /* Disable writeback. */ - c->dst.type = OP_NONE; + if (c->modrm_mod == 3) + return em_svm(ctxt); + else + return em_lidt(ctxt); break; case 4: /* smsw */ - c->dst.bytes = 2; - c->dst.val = ops->get_cr(ctxt, 0); + rc = em_smsw(ctxt); break; case 6: /* lmsw */ - ops->set_cr(ctxt, 0, (ops->get_cr(ctxt, 0) & ~0x0eul) | - (c->src.val & 0x0f)); - c->dst.type = OP_NONE; + rc = em_lmsw(ctxt); break; case 5: /* not defined */ - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; + rc = emulate_ud(ctxt); + break; case 7: /* invlpg*/ rc = em_invlpg(ctxt); break; -- cgit v1.2.3-70-g09d2 From 68152d88122b24fad0f5910f74efcd19120a19a8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:17:13 +0300 Subject: KVM: x86 emulator: Don't force #UD for 0F 01 /5 While it isn't defined, no need to force a #UD. If it becomes defined in the future this can cause wierd problems for the guest. Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8e1d0c8196b..2132fab188b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4127,9 +4127,6 @@ twobyte_insn: case 6: /* lmsw */ rc = em_lmsw(ctxt); break; - case 5: /* not defined */ - rc = emulate_ud(ctxt); - break; case 7: /* invlpg*/ rc = em_invlpg(ctxt); break; -- cgit v1.2.3-70-g09d2 From 5ef39c71d8398115245a5974b488f8703ba3a6b0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:21:50 +0300 Subject: KVM: x86 emulator: Use opcode::execute for 0F 01 opcode Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 56 ++++++++++++-------------------------------------- 1 file changed, 13 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2132fab188b..252f28348cf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2625,18 +2625,13 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_svm(struct x86_emulate_ctxt *ctxt) +static int em_vmmcall(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc; - switch (c->modrm_rm) { - case 1: - rc = ctxt->ops->fix_hypercall(ctxt); - break; - default: - return X86EMUL_UNHANDLEABLE; - } + rc = ctxt->ops->fix_hypercall(ctxt); + /* Disable writeback. */ c->dst.type = OP_NONE; return rc; @@ -2909,7 +2904,7 @@ static struct opcode group7_rm1[] = { static struct opcode group7_rm3[] = { DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - DI(SrcNone | ModRM | Prot | VendorSpecific, vmmcall), + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), @@ -2961,15 +2956,17 @@ static struct opcode group6[] = { static struct group_dual group7 = { { DI(ModRM | Mov | DstMem | Priv, sgdt), DI(ModRM | Mov | DstMem | Priv, sidt), - DI(ModRM | SrcMem | Priv, lgdt), DI(ModRM | SrcMem | Priv, lidt), - DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), - DI(SrcMem | ModRM | ByteOp | Priv | NoAccess, invlpg), + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), + II(ModRM | SrcMem | Priv, em_lidt, lidt), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), EXT(0, group7_rm1), + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + EXT(0, group7_rm1), N, EXT(0, group7_rm3), - DI(SrcNone | ModRM | DstMem | Mov, smsw), N, - DI(SrcMem16 | ModRM | Mov | Priv, lmsw), EXT(0, group7_rm7), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { @@ -4107,33 +4104,6 @@ done: twobyte_insn: switch (c->b) { - case 0x01: /* lgdt, lidt, lmsw */ - switch (c->modrm_reg) { - case 0: /* vmcall */ - rc = em_vmcall(ctxt); - break; - case 2: /* lgdt */ - rc = em_lgdt(ctxt); - break; - case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) - return em_svm(ctxt); - else - return em_lidt(ctxt); - break; - case 4: /* smsw */ - rc = em_smsw(ctxt); - break; - case 6: /* lmsw */ - rc = em_lmsw(ctxt); - break; - case 7: /* invlpg*/ - rc = em_invlpg(ctxt); - break; - default: - goto cannot_emulate; - } - break; case 0x05: /* syscall */ rc = emulate_syscall(ctxt, ops); break; -- cgit v1.2.3-70-g09d2 From 40e19b519caeb93def89c45082d776fccfb96dbb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Apr 2011 12:35:41 +0300 Subject: KVM: SVM: Get rid of x86_intercept_map::valid By reserving 0 as an invalid x86_intercept_stage, we no longer need to store a valid flag in x86_intercept_map. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/svm.c | 12 ++++-------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 127ea3e1717..28114f581fa 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -304,6 +304,7 @@ struct x86_emulate_ctxt { X86EMUL_MODE_PROT64) enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ X86_ICPT_PRE_EXCEPT, X86_ICPT_POST_EXCEPT, X86_ICPT_POST_MEMACCESS, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index de4bba99160..9cff0368e1f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3959,19 +3959,15 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) } #define PRE_EX(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_PRE_EXCEPT, \ - .valid = true } + .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_POST_EXCEPT, \ - .valid = true } + .stage = X86_ICPT_POST_EXCEPT, } #define POST_MEM(exit) { .exit_code = (exit), \ - .stage = X86_ICPT_POST_MEMACCESS, \ - .valid = true } + .stage = X86_ICPT_POST_MEMACCESS, } static struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; - bool valid; } x86_intercept_map[] = { [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), @@ -4039,7 +4035,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, icpt_info = x86_intercept_map[info->intercept]; - if (!icpt_info.valid || stage != icpt_info.stage) + if (stage != icpt_info.stage) goto out; switch (icpt_info.exit_code) { -- cgit v1.2.3-70-g09d2 From 6e2ca7d1802bf8ed9908435e34daa116662e7790 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 22 Apr 2011 00:34:44 +0900 Subject: KVM: MMU: Optimize guest page table walk This patch optimizes the guest page table walk by using get_user() instead of copy_from_user(). With this patch applied, paging64_walk_addr_generic() has become about 0.5us to 1.0us faster on my Phenom II machine with NPT on. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1b6899088f9..a32a1c80914 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; + pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; @@ -158,6 +159,9 @@ walk: pt_access = ACC_ALL; for (;;) { + gfn_t real_gfn; + unsigned long host_addr; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -166,9 +170,22 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (real_gfn == UNMAPPED_GVA) { + present = false; + break; + } + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (kvm_is_error_hva(host_addr)) { + present = false; + break; + } + + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (get_user(pte, ptep_user)) { present = false; break; } -- cgit v1.2.3-70-g09d2 From d67fc27ae2bdc2d2fb6c8ec4238a12b502b95cc7 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:48:02 +0900 Subject: KVM: x86 emulator: Use opcode::execute for Group 1, CMPS and SCAS The following instructions are changed to use opcode::execute. Group 1 (80-83) ADD (00-05), OR (08-0D), ADC (10-15), SBB (18-1D), AND (20-25), SUB (28-2D), XOR (30-35), CMP (38-3D) CMPS (A6-A7), SCAS (AE-AF) The last two do the same as CMP in the emulator, so em_cmp() is used. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 161 +++++++++++++++++++++++++++---------------------- 1 file changed, 89 insertions(+), 72 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 252f28348cf..8784916abf7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2512,6 +2512,72 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_add(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_or(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_adc(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sbb(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_and(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sub(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_xor(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_cmp(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static int em_imul(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2892,9 +2958,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) -#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static struct opcode group7_rm1[] = { DI(SrcNone | ModRM | Priv, monitor), @@ -2918,8 +2984,16 @@ static struct opcode group7_rm7[] = { DIP(SrcNone | ModRM, rdtscp, check_rdtsc), N, N, N, N, N, N, }; + static struct opcode group1[] = { - X7(D(Lock)), N + I(Lock, em_add), + I(Lock, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(0, em_cmp), }; static struct opcode group1A[] = { @@ -2991,25 +3065,25 @@ static struct gprefix pfx_0f_6f_0f_7f = { static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - D6ALU(Lock), + I6ALU(Lock, em_add), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x08 - 0x0F */ - D6ALU(Lock), + I6ALU(Lock, em_or), D(ImplicitOps | Stack | No64), N, /* 0x10 - 0x17 */ - D6ALU(Lock), + I6ALU(Lock, em_adc), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x18 - 0x1F */ - D6ALU(Lock), + I6ALU(Lock, em_sbb), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x20 - 0x27 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_and), N, N, /* 0x28 - 0x2F */ - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - D6ALU(0), N, N, + I6ALU(0, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ @@ -3050,12 +3124,12 @@ static struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - D2bv(SrcSI | DstDI | String), + I2bv(SrcSI | DstDI | String, em_cmp), /* 0xA8 - 0xAF */ D2bv(DstAcc | SrcImm), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - D2bv(SrcAcc | DstDI | String), + I2bv(SrcAcc | DstDI | String, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -3179,7 +3253,7 @@ static struct opcode twobyte_table[256] = { #undef D2bv #undef D2bvIP #undef I2bv -#undef D6ALU +#undef I6ALU static unsigned imm_size(struct decode_cache *c) { @@ -3715,60 +3789,27 @@ special_insn: goto twobyte_insn; switch (c->b) { - case 0x00 ... 0x05: - add: /* add */ - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); - break; case 0x06: /* push es */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); break; - case 0x08 ... 0x0d: - or: /* or */ - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); - break; case 0x0e: /* push cs */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; - case 0x10 ... 0x15: - adc: /* adc */ - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); - break; case 0x16: /* push ss */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); break; - case 0x18 ... 0x1d: - sbb: /* sbb */ - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); - break; case 0x1e: /* push ds */ rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); break; - case 0x20 ... 0x25: - and: /* and */ - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); - break; - case 0x28 ... 0x2d: - sub: /* sub */ - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); - break; - case 0x30 ... 0x35: - xor: /* xor */ - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); - break; - case 0x38 ... 0x3d: - cmp: /* cmp */ - c->dst.type = OP_NONE; /* Disable writeback. */ - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); break; @@ -3803,26 +3844,6 @@ special_insn: if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); break; - case 0x80 ... 0x83: /* Grp1 */ - switch (c->modrm_reg) { - case 0: - goto add; - case 1: - goto or; - case 2: - goto adc; - case 3: - goto sbb; - case 4: - goto and; - case 5: - goto sub; - case 6: - goto xor; - case 7: - goto cmp; - } - break; case 0x84 ... 0x85: test: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); @@ -3892,12 +3913,8 @@ special_insn: c->dst.bytes = c->op_bytes; rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); break; - case 0xa6 ... 0xa7: /* cmps */ - goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; - case 0xae ... 0xaf: /* scas */ - goto cmp; case 0xc0 ... 0xc1: emulate_grp2(ctxt); break; -- cgit v1.2.3-70-g09d2 From c54fe504693204fa672b10a57c3d82a8c41e0b4e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:49:40 +0900 Subject: KVM: x86 emulator: Use opcode::execute for POP reg (58-5F) In addition, the RET emulation is changed to call em_pop() to remove the pop_instruction label. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8784916abf7..9f491bfb00f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1400,6 +1400,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_pop(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + return emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} + static int emulate_popf(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, void *dest, int len) @@ -3089,7 +3096,7 @@ static struct opcode opcode_table[256] = { /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ - X8(D(DstReg | Stack)), + X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , @@ -3816,10 +3823,6 @@ special_insn: case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x58 ... 0x5f: /* pop reg */ - pop_instruction: - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - break; case 0x60: /* pusha */ rc = emulate_pusha(ctxt); break; @@ -3922,7 +3925,8 @@ special_insn: c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - goto pop_instruction; + rc = em_pop(ctxt); + break; case 0xc4: /* les */ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); break; -- cgit v1.2.3-70-g09d2 From b96a7fad020b42eb4a564f8a2fb41827a83c4375 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:51:07 +0900 Subject: KVM: x86 emulator: Use opcode::execute for PUSHA/POPA (60/61) For this, emulate_pusha/popa() are converted to em_pusha/popa(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9f491bfb00f..b7c6e43e4f2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1473,7 +1473,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt) +static int em_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1494,8 +1494,7 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt) return rc; } -static int emulate_popa(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1508,7 +1507,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, --reg; } - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, ctxt->ops, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -3098,7 +3097,8 @@ static struct opcode opcode_table[256] = { /* 0x58 - 0x5F */ X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), + I(ImplicitOps | Stack | No64, em_pusha), + I(ImplicitOps | Stack | No64, em_popa), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , N, N, N, N, /* 0x68 - 0x6F */ @@ -3823,12 +3823,6 @@ special_insn: case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x60: /* pusha */ - rc = emulate_pusha(ctxt); - break; - case 0x61: /* popa */ - rc = emulate_popa(ctxt, ops); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; -- cgit v1.2.3-70-g09d2 From 62aaa2f05abd59598f132e6ebad86318291b5be0 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 23 Apr 2011 18:52:56 +0900 Subject: KVM: x86 emulator: Use opcode::execute for PUSHF/POPF (9C/9D) For this, em_pushf/popf() are introduced. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b7c6e43e4f2..c1d9116cf3a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1448,6 +1448,16 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_popf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.type = OP_REG; + c->dst.addr.reg = &ctxt->eflags; + c->dst.bytes = c->op_bytes; + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} + static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, int seg) { @@ -1494,6 +1504,14 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_pushf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = (unsigned long)ctxt->eflags; + return em_push(ctxt); +} + static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -3126,7 +3144,8 @@ static struct opcode opcode_table[256] = { /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - DI(ImplicitOps | Stack, pushf), DI(ImplicitOps | Stack, popf), N, N, + II(ImplicitOps | Stack, em_pushf, pushf), + II(ImplicitOps | Stack, em_popf, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), @@ -3900,16 +3919,6 @@ special_insn: case 8: c->dst.val = (s32)c->dst.val; break; } break; - case 0x9c: /* pushf */ - c->src.val = (unsigned long) ctxt->eflags; - rc = em_push(ctxt); - break; - case 0x9d: /* popf */ - c->dst.type = OP_REG; - c->dst.addr.reg = &ctxt->eflags; - c->dst.bytes = c->op_bytes; - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); - break; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; case 0xc0 ... 0xc1: -- cgit v1.2.3-70-g09d2 From 781e0743af3c5ba356d55bc60df59f2dded1e938 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 Apr 2011 12:25:50 +0300 Subject: KVM: MMU: Add unlikely() annotations to walk_addr_generic() walk_addr_generic() is a hot path and is also hard for the cpu to predict - some of the parameters (fetch_fault in particular) vary wildly from invocation to invocation. Add unlikely() annotations where appropriate; all walk failures are considered unlikely, as are cases where we have to mark the accessed or dirty bit, as they are slow paths both in kvm and on real processors. Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a32a1c80914..652d56c081f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -172,49 +172,51 @@ walk: real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), PFERR_USER_MASK|PFERR_WRITE_MASK); - if (real_gfn == UNMAPPED_GVA) { + if (unlikely(real_gfn == UNMAPPED_GVA)) { present = false; break; } real_gfn = gpa_to_gfn(real_gfn); host_addr = gfn_to_hva(vcpu->kvm, real_gfn); - if (kvm_is_error_hva(host_addr)) { + if (unlikely(kvm_is_error_hva(host_addr))) { present = false; break; } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (get_user(pte, ptep_user)) { + if (unlikely(get_user(pte, ptep_user))) { present = false; break; } trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_gpte(pte)) { + if (unlikely(!is_present_gpte(pte))) { present = false; break; } - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, + walker->level))) { rsvd_fault = true; break; } - if (write_fault && !is_writable_pte(pte)) - if (user_fault || is_write_protection(vcpu)) - eperm = true; + if (unlikely(write_fault && !is_writable_pte(pte) + && (user_fault || is_write_protection(vcpu)))) + eperm = true; - if (user_fault && !(pte & PT_USER_MASK)) + if (unlikely(user_fault && !(pte & PT_USER_MASK))) eperm = true; #if PTTYPE == 64 - if (fetch_fault && (pte & PT64_NX_MASK)) + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) eperm = true; #endif - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + if (!eperm && !rsvd_fault + && unlikely(!(pte & PT_ACCESSED_MASK))) { int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); @@ -270,10 +272,10 @@ walk: --walker->level; } - if (!present || eperm || rsvd_fault) + if (unlikely(!present || eperm || rsvd_fault)) goto error; - if (write_fault && !is_dirty_gpte(pte)) { + if (write_fault && unlikely(!is_dirty_gpte(pte))) { int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); -- cgit v1.2.3-70-g09d2 From 46561646ce409ad96c22645362c113de04f60bfb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 Apr 2011 14:09:59 +0300 Subject: KVM: x86 emulator: consolidate group handling Move all groups into a single field and handle them in a single place. This saves bits when we add more group types (3 bits -> 7 groups types). Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 80 +++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c1d9116cf3a..7466abae84b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -73,11 +73,12 @@ #define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ +#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ -#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */ +#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ #define Sse (1<<17) /* SSE Vector instruction */ -#define RMExt (1<<18) /* Opcode extension in ModRM r/m if mod == 3 */ /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ @@ -2969,7 +2970,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define N D(0) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } @@ -3337,9 +3338,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix; + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; bool op_prefix = false; - struct opcode opcode, *g_mod012, *g_mod3; + struct opcode opcode; struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; @@ -3433,44 +3434,43 @@ done_prefixes: } c->d = opcode.flags; - if (c->d & Group) { - dual = c->d & GroupDual; - c->modrm = insn_fetch(u8, 1, c->eip); - --c->eip; - - if (c->d & GroupDual) { - g_mod012 = opcode.u.gdual->mod012; - g_mod3 = opcode.u.gdual->mod3; - } else - g_mod012 = g_mod3 = opcode.u.group; - - c->d &= ~(Group | GroupDual); - - goffset = (c->modrm >> 3) & 7; - - if ((c->modrm >> 6) == 3) - opcode = g_mod3[goffset]; - else - opcode = g_mod012[goffset]; - - if (opcode.flags & RMExt) { + while (c->d & GroupMask) { + switch (c->d & GroupMask) { + case Group: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + opcode = opcode.u.group[goffset]; + break; + case GroupDual: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + if ((c->modrm >> 6) == 3) + opcode = opcode.u.gdual->mod3[goffset]; + else + opcode = opcode.u.gdual->mod012[goffset]; + break; + case RMExt: goffset = c->modrm & 7; opcode = opcode.u.group[goffset]; - } - - c->d |= opcode.flags; - } - - if (c->d & Prefix) { - if (c->rep_prefix && op_prefix) + break; + case Prefix: + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + break; + default: return X86EMUL_UNHANDLEABLE; - simd_prefix = op_prefix ? 0x66 : c->rep_prefix; - switch (simd_prefix) { - case 0x00: opcode = opcode.u.gprefix->pfx_no; break; - case 0x66: opcode = opcode.u.gprefix->pfx_66; break; - case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; - case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; } + + c->d &= ~GroupMask; c->d |= opcode.flags; } -- cgit v1.2.3-70-g09d2 From 8d7d810255982bfcc355cdb8972d72843acb0cf8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:21 +0300 Subject: KVM: mmio_fault_cr2 is not used Remove unused variable mmio_fault_cr2. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6cfc1ab2cdd..afb0e69bd16 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -358,7 +358,6 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; - gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a831d5d8ca1..e3ac212f7c8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4561,7 +4561,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, bool writeback = true; kvm_clear_exception_queue(vcpu); - vcpu->arch.mmio_fault_cr2 = cr2; + /* * TODO: fix emulate.c to use guest_read/write_register * instead of direct ->regs accesses, can save hundred cycles -- cgit v1.2.3-70-g09d2 From 4947e7cd0ee36e1aa37dfec4f7fa71cc64a2f0fd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:23 +0300 Subject: KVM: emulator: Propagate fault in far jump emulation Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7466abae84b..3624f202b44 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3993,7 +3993,8 @@ special_insn: jump_far: memcpy(&sel, c->src.valptr + c->op_bytes, 2); - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) + rc = load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) goto done; c->eip = 0; -- cgit v1.2.3-70-g09d2 From 0004c7c25757f103ddb3a9e4bcfd533aad41f9a0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:24 +0300 Subject: KVM: Fix compound mmio mmio_index should be taken into account when copying data from userspace. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3ac212f7c8..a9a307a7546 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5576,7 +5576,8 @@ static int complete_mmio(struct kvm_vcpu *vcpu) if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) - memcpy(vcpu->mmio_data, run->mmio.data, 8); + memcpy(vcpu->mmio_data + vcpu->mmio_index, + run->mmio.data, 8); vcpu->mmio_index += 8; if (vcpu->mmio_index < vcpu->mmio_size) { run->exit_reason = KVM_EXIT_MMIO; -- cgit v1.2.3-70-g09d2 From 2aab2c5b2bac6510b3bd143ca83babee382f4302 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 12 Apr 2011 12:36:25 +0300 Subject: KVM: call cache_all_regs() only once during instruction emulation Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9a307a7546..f5f2d3d4457 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4459,6 +4459,12 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int cs_db, cs_l; + /* + * TODO: fix emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ cache_all_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); @@ -4562,14 +4568,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_clear_exception_queue(vcpu); - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); - if (!(emulation_type & EMULTYPE_NO_DECODE)) { init_emulate_ctxt(vcpu); vcpu->arch.emulate_ctxt.interruptibility = 0; -- cgit v1.2.3-70-g09d2 From 4429d5dc1197aaf8188e4febcde54d26a51baf6c Mon Sep 17 00:00:00 2001 From: "BrillyWu@viatech.com.cn" Date: Mon, 25 Apr 2011 13:55:15 +0800 Subject: KVM: Add CPUID support for VIA CPU The CPUIDs for Centaur are added, and then the features of PadLock hardware engine on VIA CPU, such as "ace", "ace_en" and so on, can be passed into the kvm guest. Signed-off-by: Brilly Wu Signed-off-by: Kary Jin Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5f2d3d4457..22bc69ccf3e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2336,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0xC0000001.edx */ + const u32 kvm_supported_word5_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | + F(PMM) | F(PMM_EN); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); @@ -2445,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); + break; + case 0xC0000001: + entry->edx &= kvm_supported_word5_x86_features; + cpuid_mask(&entry->edx, 5); + break; + case 0xC0000002: + case 0xC0000003: + case 0xC0000004: + /*Now nothing to do, reserved for the future*/ + break; } kvm_x86_ops->set_supported_cpuid(function, entry); @@ -2491,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (nent >= cpuid->nent) goto out_free; + /* Add support for Centaur's CPUID instruction. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + limit = cpuid_entries[nent - 1].eax; + for (func = 0xC0000001; + func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + } + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, cpuid->nent); -- cgit v1.2.3-70-g09d2 From b74323dc2b7b5690d18bc7934b98e4665e778a7b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 27 Apr 2011 14:06:07 -0400 Subject: KVM: ia64: fix sparse warnings This patch fixes some sparse warning about "dubious one-bit signed bitfield." Signed-off-by: Jeff Mahoney Originally-by: Jan Blunck Signed-off-by: Jan Blunck Acked-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/vti.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h index f6c5617e16a..b214b5b0432 100644 --- a/arch/ia64/kvm/vti.h +++ b/arch/ia64/kvm/vti.h @@ -83,13 +83,13 @@ union vac { unsigned long value; struct { - int a_int:1; - int a_from_int_cr:1; - int a_to_int_cr:1; - int a_from_psr:1; - int a_from_cpuid:1; - int a_cover:1; - int a_bsw:1; + unsigned int a_int:1; + unsigned int a_from_int_cr:1; + unsigned int a_to_int_cr:1; + unsigned int a_from_psr:1; + unsigned int a_from_cpuid:1; + unsigned int a_cover:1; + unsigned int a_bsw:1; long reserved:57; }; }; @@ -97,12 +97,12 @@ union vac { union vdc { unsigned long value; struct { - int d_vmsw:1; - int d_extint:1; - int d_ibr_dbr:1; - int d_pmc:1; - int d_to_pmd:1; - int d_itm:1; + unsigned int d_vmsw:1; + unsigned int d_extint:1; + unsigned int d_ibr_dbr:1; + unsigned int d_pmc:1; + unsigned int d_to_pmd:1; + unsigned int d_itm:1; long reserved:58; }; }; -- cgit v1.2.3-70-g09d2 From 8f74d8e16812d63639871b4e56409b08bdcb66fc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 28 Apr 2011 07:08:36 +0900 Subject: KVM: MMU: Fix 64-bit paging breakage on x86_32 Fix regression introduced by commit e30d2a170506830d5eef5e9d7990c5aedf1b0a51 KVM: MMU: Optimize guest page table walk On x86_32, get_user() does not support 64-bit values and we fail to build KVM at the point of 64-bit paging. This patch fixes this by using get_user() twice for that condition. Signed-off-by: Takuya Yoshikawa Reported-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 652d56c081f..52450a6b784 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -115,6 +115,20 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } +static int FNAME(read_gpte)(pt_element_t *pte, pt_element_t __user *ptep_user) +{ +#if defined(CONFIG_X86_32) && (PTTYPE == 64) + u32 *p = (u32 *)pte; + u32 __user *p_user = (u32 __user *)ptep_user; + + if (unlikely(get_user(*p, p_user))) + return -EFAULT; + return get_user(*(p + 1), p_user + 1); +#else + return get_user(*pte, ptep_user); +#endif +} + /* * Fetch a guest pte for a guest virtual address */ @@ -185,7 +199,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(get_user(pte, ptep_user))) { + if (unlikely(FNAME(read_gpte)(&pte, ptep_user))) { present = false; break; } -- cgit v1.2.3-70-g09d2 From ae8cc059550b2c2ec7a5e9650bb1be7b988a1208 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 24 Apr 2011 22:00:50 -0700 Subject: KVM: SVM: Make dump_vmcb static, reduce text dump_vmcb isn't used outside this module, make it static. Shrink text and object by ~1% by standardizing formats. $ size arch/x86/kvm/svm.o* text data bss dec hex filename 52910 580 10072 63562 f84a arch/x86/kvm/svm.o.new 53563 580 10072 64215 fad7 arch/x86/kvm/svm.o.old Signed-off-by: Joe Perches Acked-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 176 ++++++++++++++++++++++++++++------------------------- 1 file changed, 94 insertions(+), 82 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9cff0368e1f..506e4fe23ad 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3191,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -- cgit v1.2.3-70-g09d2 From 0a434bb2bf094f463ca3ca71ac42cea9e423048f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Apr 2011 15:59:33 +0300 Subject: KVM: VMX: Avoid reading %rip unnecessarily when handling exceptions Avoids a VMREAD. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3f6e9bff016..139a5cb1f5e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3170,7 +3170,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) } error_code = 0; - rip = kvm_rip_read(vcpu); if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { @@ -3217,6 +3216,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; -- cgit v1.2.3-70-g09d2 From 1aa366163b8b69f660cf94fd5062fa44859e4318 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 27 Apr 2011 13:20:30 +0300 Subject: KVM: x86 emulator: consolidate segment accessors Instead of separate accessors for the segment selector and cached descriptor, use one accessor for both. This simplifies the code somewhat. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 13 ++-- arch/x86/kvm/emulate.c | 122 +++++++++++++++++++++---------------- arch/x86/kvm/x86.c | 41 +++---------- 3 files changed, 83 insertions(+), 93 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 28114f581fa..0049211959c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -164,15 +164,10 @@ struct x86_emulate_ops { int size, unsigned short port, const void *val, unsigned int count); - bool (*get_cached_descriptor)(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 *base3, - int seg); - void (*set_cached_descriptor)(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 base3, - int seg); - u16 (*get_segment_selector)(struct x86_emulate_ctxt *ctxt, int seg); - void (*set_segment_selector)(struct x86_emulate_ctxt *ctxt, - u16 sel, int seg); + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, int seg); void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3624f202b44..59992484f5f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -553,6 +553,26 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +{ + u16 selector; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); + return selector; +} + +static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, + unsigned seg) +{ + u16 dummy; + u32 base3; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); +} + static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned size, bool write, bool fetch, @@ -563,6 +583,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, bool usable; ulong la; u32 lim; + u16 sel; unsigned cpl, rpl; la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; @@ -574,8 +595,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, 0); break; default: - usable = ctxt->ops->get_cached_descriptor(ctxt, &desc, NULL, - addr.seg); + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, + addr.seg); if (!usable) goto bad; /* code segment or read-only data segment */ @@ -598,7 +619,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - rpl = ctxt->ops->get_segment_selector(ctxt, addr.seg) & 3; + rpl = sel & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ @@ -1142,9 +1163,10 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, { if (selector & 1 << 2) { struct desc_struct desc; + u16 sel; + memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(ctxt, &desc, NULL, - VCPU_SREG_LDTR)) + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ @@ -1305,8 +1327,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(ctxt, selector, seg); - ops->set_cached_descriptor(ctxt, &seg_desc, 0, seg); + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1464,7 +1485,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(ctxt, seg); + c->src.val = get_segment_selector(ctxt, seg); return em_push(ctxt); } @@ -1552,7 +1573,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(ctxt, VCPU_SREG_CS); + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -1838,8 +1859,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, struct desc_struct *cs, struct desc_struct *ss) { + u16 selector; + memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(ctxt, cs, NULL, VCPU_SREG_CS); + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1888,10 +1911,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; if (efer & EFER_LMA) { @@ -1961,10 +1982,8 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.l = 1; } - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; @@ -2018,10 +2037,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(ctxt, &cs, 0, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, cs_sel, VCPU_SREG_CS); - ops->set_cached_descriptor(ctxt, &ss, 0, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, ss_sel, VCPU_SREG_SS); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -2048,11 +2065,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, struct desc_struct tr_seg; u32 base3; int r; - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(ctxt, &tr_seg, &base3, VCPU_SREG_TR); + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -2107,11 +2124,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); - tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); - tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); - tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); - tss->ldt = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -2136,11 +2153,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); - ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); - ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2227,13 +2244,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(ctxt, VCPU_SREG_ES); - tss->cs = ops->get_segment_selector(ctxt, VCPU_SREG_CS); - tss->ss = ops->get_segment_selector(ctxt, VCPU_SREG_SS); - tss->ds = ops->get_segment_selector(ctxt, VCPU_SREG_DS); - tss->fs = ops->get_segment_selector(ctxt, VCPU_SREG_FS); - tss->gs = ops->get_segment_selector(ctxt, VCPU_SREG_GS); - tss->ldt_selector = ops->get_segment_selector(ctxt, VCPU_SREG_LDTR); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -2260,13 +2277,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); - ops->set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); - ops->set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); - ops->set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); - ops->set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); - ops->set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); - ops->set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2348,7 +2365,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(ctxt, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; @@ -2411,8 +2428,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, } ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); - ops->set_cached_descriptor(ctxt, &next_tss_desc, 0, VCPU_SREG_TR); - ops->set_segment_selector(ctxt, tss_selector, VCPU_SREG_TR); + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2503,7 +2519,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(ctxt, VCPU_SREG_CS); + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -3881,7 +3897,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(ctxt, c->modrm_reg); + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22bc69ccf3e..77c9d8673dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4304,13 +4304,14 @@ static unsigned long emulator_get_cached_segment_base( return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 *base3, - int seg) +static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); + *selector = var.selector; if (var.unusable) return false; @@ -4335,16 +4336,14 @@ static bool emulator_get_cached_descriptor(struct x86_emulate_ctxt *ctxt, return true; } -static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, - struct desc_struct *desc, u32 base3, - int seg) +static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, + int seg) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - + var.selector = selector; var.base = get_desc_base(desc); #ifdef CONFIG_X86_64 var.base |= ((u64)base3) << 32; @@ -4368,24 +4367,6 @@ static void emulator_set_cached_descriptor(struct x86_emulate_ctxt *ctxt, return; } -static u16 emulator_get_segment_selector(struct x86_emulate_ctxt *ctxt, int seg) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); - return kvm_seg.selector; -} - -static void emulator_set_segment_selector(struct x86_emulate_ctxt *ctxt, - u16 sel, int seg) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(emul_to_vcpu(ctxt), &kvm_seg, seg); -} - static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { @@ -4436,10 +4417,8 @@ static struct x86_emulate_ops emulate_ops = { .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, + .get_segment = emulator_get_segment, + .set_segment = emulator_set_segment, .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, -- cgit v1.2.3-70-g09d2 From 2fb92db1ec08f3235c500e7f460eeb78092d844e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 27 Apr 2011 19:42:18 +0300 Subject: KVM: VMX: Cache vmcs segment fields Since the emulator now checks segment limits and access rights, it generates a lot more accesses to the vmcs segment fields. Undo some of the performance hit by cacheing those fields in a read-only cache (the entire cache is invalidated on any write, or on guest exit). Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 102 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 93 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afb0e69bd16..d2ac8e2ee89 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -136,6 +136,7 @@ enum kvm_reg_ex { VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, }; enum { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 139a5cb1f5e..4c3fa0f6746 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -162,6 +162,10 @@ struct vcpu_vmx { u32 ar; } tr, es, ds, fs, gs; } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ + struct kvm_save_segment seg[8]; + } segment_cache; int vpid; bool emulation_required; @@ -174,6 +178,15 @@ struct vcpu_vmx { bool rdtscp_enabled; }; +enum segment_cache_field { + SEG_FIELD_SEL = 0, + SEG_FIELD_BASE = 1, + SEG_FIELD_LIMIT = 2, + SEG_FIELD_AR = 3, + + SEG_FIELD_NR = 4 +}; + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -646,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; @@ -1271,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; #ifdef CONFIG_X86_64 case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_FS_BASE, data); break; case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: @@ -1717,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); @@ -1740,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -1803,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } + vmx_segment_cache_clear(vmx); + vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1879,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) { u32 guest_tr_ar; + vmx_segment_cache_clear(to_vmx(vcpu)); + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", @@ -2082,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_save_segment *save; u32 ar; @@ -2104,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = save->limit; ar = save->ar; if (seg == VCPU_SREG_TR - || var->selector == vmcs_read16(sf->selector)) + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) goto use_saved_rmode_seg; } - var->base = vmcs_readl(sf->base); - var->limit = vmcs_read32(sf->limit); - var->selector = vmcs_read16(sf->selector); - ar = vmcs_read32(sf->ar_bytes); + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; @@ -2127,14 +2205,13 @@ use_saved_rmode_seg: static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment s; if (to_vmx(vcpu)->rmode.vm86_active) { vmx_get_segment(vcpu, &s, seg); return s.base; } - return vmcs_readl(sf->base); + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } static int __vmx_get_cpl(struct kvm_vcpu *vcpu) @@ -2146,7 +2223,7 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmcs_read16(GUEST_CS_SELECTOR) & 3; + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; } static int vmx_get_cpl(struct kvm_vcpu *vcpu) @@ -2188,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; + vmx_segment_cache_clear(vmx); + if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); vmx->rmode.tr.selector = var->selector; @@ -2229,7 +2308,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); *db = (ar >> 14) & 1; *l = (ar >> 13) & 1; @@ -2816,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (ret != 0) goto out; + vmx_segment_cache_clear(vmx); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -4188,6 +4269,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | (1 << VCPU_EXREG_RFLAGS) | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; -- cgit v1.2.3-70-g09d2 From 90d34b0e45df3bfe522e9e9d604c4c1a0253699d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 29 Mar 2011 16:49:10 -0500 Subject: KVM: PPC: e500: emulate SVR Return the actual host SVR for now, as we already do for PVR. Eventually we may support Qemu overriding PVR/SVR if the situation is appropriate, once we implement KVM_SET_SREGS on e500. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_e500.h | 1 + arch/powerpc/kvm/e500.c | 1 + arch/powerpc/kvm/e500_emulate.c | 2 ++ 3 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb2..bb2a0890600 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; + u32 svr; u32 mas0; u32 mas1; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b59..0c1af126784 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Registers init */ vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ vcpu->vcpu_id = 0; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc963..e2fb47f035a 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -175,6 +175,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; case SPRN_HID1: kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; case SPRN_MMUCSR0: kvmppc_set_gpr(vcpu, rt, 0); break; -- cgit v1.2.3-70-g09d2 From 49ea06957bf637b28aa338fba26432d5bafdeb99 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 28 Mar 2011 15:01:24 -0500 Subject: KVM: PPC: fix exit accounting for SPRs, tlbwe, tlbsx The exit type setting for mfspr/mtspr is moved from 44x to toplevel SPR emulation. This enables it on e500, and makes sure that all SPRs are covered. Exit accounting for tlbwe and tlbsx is added to e500. Signed-off-by: Stuart Yoder Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 2 -- arch/powerpc/kvm/e500_tlb.c | 5 ++++- arch/powerpc/kvm/emulate.c | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b2..549bb2c9a47 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); return emulated; } @@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); return emulated; } diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a..56ac4523857 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * @@ -24,6 +24,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" #include "trace.h" +#include "timing.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) vcpu_e500->mas7 = 0; } + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); return EMULATE_DONE; } @@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) write_host_tlbe(vcpu_e500, stlbsel, sesel); } + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb..8f7a3aa03c2 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -294,6 +294,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: @@ -363,6 +364,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) printk("mtspr: unknown spr %x\n", sprn); break; } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); break; case OP_31_XOP_DCBI: -- cgit v1.2.3-70-g09d2 From 1a040b26c5c915b317103b87ae7006d40443f197 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Mon, 28 Mar 2011 15:01:56 -0500 Subject: KVM: PPC: use ticks, not usecs, for exit timing Convert to microseconds when displaying (with fix from Bharat Bhushan ). This reduces rounding error with large quantities of short exits. Signed-off-by: Stuart Yoder Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/timing.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 18f40fd3e98..319177df958 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -151,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) { struct kvm_vcpu *vcpu = m->private; int i; + u64 min, max, sum, sum_quad; seq_printf(m, "%s", "type count min max sum sum_squared\n"); + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", kvm_exit_names[i], vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); + min, + max, + sum, + sum_quad); + } return 0; } -- cgit v1.2.3-70-g09d2 From eab176722f4628b2d9cf76221a43dd3a0e37e632 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 27 Apr 2011 17:24:10 -0500 Subject: KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0) Linux doesn't use USPRG0 (now renamed VRSAVE in the architecture, even when Altivec isn't involved), but a guest might. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/booke_interrupts.S | 1 - arch/powerpc/kvm/powerpc.c | 13 +++++++++++++ 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 890897cee05..a1680439916 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -223,6 +223,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong sprg4; ulong sprg5; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145a..cf0d8227895 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -395,6 +395,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2..b58ccae9590 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -380,7 +380,6 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_SPRG4(r4) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ec3d2e75c0a..9e6aa8bfd16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -298,12 +298,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, -- cgit v1.2.3-70-g09d2 From 5ce941ee4258b836cf818d2ac159d8cf3ebad648 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 27 Apr 2011 17:24:21 -0500 Subject: KVM: PPC: booke: add sregs support Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- Documentation/kvm/api.txt | 6 +- arch/powerpc/include/asm/kvm.h | 184 ++++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_44x.h | 1 - arch/powerpc/include/asm/kvm_e500.h | 1 + arch/powerpc/include/asm/kvm_host.h | 3 + arch/powerpc/include/asm/kvm_ppc.h | 9 ++ arch/powerpc/kvm/44x.c | 10 ++ arch/powerpc/kvm/booke.c | 154 +++++++++++++++++++++++++++++- arch/powerpc/kvm/e500.c | 75 +++++++++++++++ arch/powerpc/kvm/e500_emulate.c | 5 +- arch/powerpc/kvm/e500_tlb.c | 8 ++ arch/powerpc/kvm/emulate.c | 13 ++- arch/powerpc/kvm/powerpc.c | 4 + include/linux/kvm.h | 1 + 14 files changed, 461 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 1b9eaa7e885..f64c41f8ba6 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure. 4.13 KVM_GET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (out) Returns: 0 on success, -1 on error @@ -279,6 +279,8 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +/* ppc -- see arch/powerpc/include/asm/kvm.h */ + interrupt_bitmap is a bitmap of pending external interrupts. At most one bit may be set. This interrupt has been acknowledged by the APIC but not yet injected into the cpu core. @@ -286,7 +288,7 @@ but not yet injected into the cpu core. 4.14 KVM_SET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (in) Returns: 0 on success, -1 on error diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad7..d2ca5ed3877 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 << 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 << 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 << 2) + +#define KVM_SREGS_E_SPRG8 (1 << 3) +#define KVM_SREGS_E_MCIVPR (1 << 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 << 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 << 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 << 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 << 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 << 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 << 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 << 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 << 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 << 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 << 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0xffffffff to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR (1 << 0) +#define KVM_SREGS_E_UPDATE_TSR (1 << 1) +#define KVM_SREGS_E_UPDATE_DEC (1 << 2) +#define KVM_SREGS_E_UPDATE_DBSR (1 << 3) + +/* + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a + * previous KVM_GET_REGS. + * + * Unless otherwise indicated, setting any register with KVM_SET_SREGS + * directly sets its value. It does not trigger any special semantics such + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct + * just received from KVM_GET_SREGS is always a no-op. + */ struct kvm_sregs { __u32 pvr; union { @@ -62,6 +170,82 @@ struct kvm_sregs { __u64 dbat[8]; } ppc32; } s; + struct { + union { + struct { /* KVM_SREGS_E_IMPL_FSL */ + __u32 features; /* KVM_SREGS_E_FSL_ */ + __u32 svr; + __u64 mcar; + __u32 hid0; + + /* KVM_SREGS_E_FSL_PIDn */ + __u32 pid1, pid2; + } fsl; + __u8 pad[256]; + } impl; + + __u32 features; /* KVM_SREGS_E_ */ + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ + __u32 pir; /* read-only */ + __u64 sprg8; + __u64 sprg9; /* E.ED */ + __u64 csrr0; + __u64 dsrr0; /* E.ED */ + __u64 mcsrr0; + __u32 csrr1; + __u32 dsrr1; /* E.ED */ + __u32 mcsrr1; + __u32 esr; + __u64 dear; + __u64 ivpr; + __u64 mcivpr; + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ + + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ + __u32 tcr; + __u32 decar; + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ + + /* + * Userspace can read TB directly, but the + * value reported here is consistent with "dec". + * + * Read-only. + */ + __u64 tb; + + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ + __u32 dbcr[3]; + __u32 iac[4]; + __u32 dac[2]; + __u32 dvc[2]; + __u8 num_iac; /* read-only */ + __u8 num_dac; /* read-only */ + __u8 num_dvc; /* read-only */ + __u8 pad; + + __u32 epr; /* EXP */ + __u32 vrsave; /* a.k.a. USPRG0 */ + __u32 epcr; /* KVM_SREGS_E_64 */ + + __u32 mas0; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; + __u32 mas4; + __u32 mas6; + + __u32 ivor_low[16]; /* IVOR0-15 */ + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ + + __u32 mmucfg; /* read-only */ + __u32 eptcfg; /* E.PT, read-only */ + __u32 tlbcfg[4];/* read-only */ + __u32 tlbps[4]; /* read-only */ + + __u32 eplc, epsc; /* E.PD */ + } e; __u8 pad[1020]; } u; }; diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a9..a0e57618ff3 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); } -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index bb2a0890600..7a2a565f88c 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -59,6 +59,7 @@ struct kvmppc_vcpu_e500 { u32 hid1; u32 tlb0cfg; u32 tlb1cfg; + u64 mcar; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a1680439916..186f150b9b8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -233,6 +233,9 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; + ulong mcsrr0; + ulong mcsrr1; + ulong mcsr; ulong esr; u32 dec; u32 decar; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c34..9345238edec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); /* Core-specific hooks */ @@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e742114..da3a1225c0a 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c..8462b3a1c1c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.shared->srr0 = regs->srr0; vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; @@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + vcpu->arch.tcr = sregs->u.e.tcr; + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + vcpu->arch.dec = sregs->u.e.dec; + + kvmppc_emulate_dec(vcpu); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + /* + * FIXME: existing KVM timer handling is incomplete. + * TSR cannot be read by the guest, and its value in + * vcpu->arch is always zero. For now, just handle + * the case where the caller is trying to inject a + * decrementer interrupt. + */ + + if ((sregs->u.e.tsr & TSR_DIS) && + (vcpu->arch.tcr & TCR_DIE)) + kvmppc_core_queue_dec(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = 0; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != 0) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 0c1af126784..318dbc61ba4 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -97,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu_e500->mas0; + sregs->u.e.mas1 = vcpu_e500->mas1; + sregs->u.e.mas2 = vcpu_e500->mas2; + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas4 = vcpu_e500->mas4; + sregs->u.e.mas6 = vcpu_e500->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu_e500->mas0 = sregs->u.e.mas0; + vcpu_e500->mas1 = sregs->u.e.mas1; + vcpu_e500->mas2 = sregs->u.e.mas2; + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas4 = sregs->u.e.mas4; + vcpu_e500->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e2fb47f035a..69cd665a0ca 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, * @@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_PID: - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = spr_val; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: vcpu_e500->pid[1] = spr_val; break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 56ac4523857..b18fe353397 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -675,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, return -1; } +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = pid; +} + void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { struct tlbe *tlbe; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 8f7a3aa03c2..141dce3c681 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + return vcpu->arch.dec - jd; +} + /* XXX to do: * lhax * lhaux @@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_DEC: { - u64 jd = get_tb() - vcpu->arch.dec_jiffies; - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug("mfDEC: %x - %llx = %lx\n", - vcpu->arch.dec, jd, - kvmppc_get_gpr(vcpu, rt)); + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); break; } default: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9e6aa8bfd16..616dd516ca1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else case KVM_CAP_PPC_SEGSTATE: +#endif case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2f63ebeac63..55ef181521f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -543,6 +543,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_ASYNC_PF 59 #define KVM_CAP_TSC_CONTROL 60 #define KVM_CAP_GET_TSC_KHZ 61 +#define KVM_CAP_PPC_BOOKE_SREGS 62 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-70-g09d2 From 12cb814f3bb35736420cc6bfc9fed7b6a9d3a828 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 7 May 2011 16:31:36 +0900 Subject: KVM: MMU: Clean up gpte reading with copy_from_user() When we optimized walk_addr_generic() by not using the generic guest memory reader, we replaced copy_from_user() with get_user(): commit e30d2a170506830d5eef5e9d7990c5aedf1b0a51 KVM: MMU: Optimize guest page table walk commit 15e2ac9a43d4d7d08088e404fddf2533a8e7d52e KVM: MMU: Fix 64-bit paging breakage on x86_32 But as Andi pointed out later, copy_from_user() does the same as get_user() as long as we give a constant size to it. So we use copy_from_user() to clean up the code. The only, noticeable, regression introduced by this is 64-bit gpte reading on x86_32 hosts needed for PAE guests. But this can be mitigated by implementing 8-byte get_user() for x86_32, if needed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 52450a6b784..88ca456ccd6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -115,20 +115,6 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } -static int FNAME(read_gpte)(pt_element_t *pte, pt_element_t __user *ptep_user) -{ -#if defined(CONFIG_X86_32) && (PTTYPE == 64) - u32 *p = (u32 *)pte; - u32 __user *p_user = (u32 __user *)ptep_user; - - if (unlikely(get_user(*p, p_user))) - return -EFAULT; - return get_user(*(p + 1), p_user + 1); -#else - return get_user(*pte, ptep_user); -#endif -} - /* * Fetch a guest pte for a guest virtual address */ @@ -199,7 +185,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(FNAME(read_gpte)(&pte, ptep_user))) { + if (unlikely(copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } -- cgit v1.2.3-70-g09d2 From fa3d315a4ce2c0891cdde262562e710d95fba19e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 7 May 2011 16:35:38 +0900 Subject: KVM: Validate userspace_addr of memslot when registered This way, we can avoid checking the user space address many times when we read the guest memory. Although we can do the same for write if we check which slots are writable, we do not care write now: reading the guest memory happens more often than writing. [avi: change VERIFY_READ to VERIFY_WRITE] Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- virt/kvm/kvm_main.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 88ca456ccd6..e3f81418797 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -185,7 +185,7 @@ walk: } ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(copy_from_user(&pte, ptep_user, sizeof(pte)))) { + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 58146457bf9..ed3c4e7c100 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -648,7 +648,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) + /* We can read the guest memory with __xxx_user() later on. */ + if (user_alloc && + ((mem->userspace_addr & (PAGE_SIZE - 1)) || + !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size))) goto out; if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; @@ -1283,7 +1286,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = copy_from_user(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; -- cgit v1.2.3-70-g09d2 From c1ed6dea8113597cfa00911faa814d9dbb586932 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:23:13 +0900 Subject: KVM: x86 emulator: Remove unused arg from seg_override() In addition, one comma at the end of a statement is replaced with a semicolon. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 59992484f5f..3d0e5ac3a0c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -500,7 +500,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct decode_cache *c) { if (!c->has_seg_override) @@ -3527,7 +3526,7 @@ done_prefixes: if (!c->has_seg_override) set_seg_override(c, VCPU_SREG_DS); - memop.addr.mem.seg = seg_override(ctxt, ops, c); + memop.addr.mem.seg = seg_override(ctxt, c); if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; @@ -3587,7 +3586,7 @@ done_prefixes: c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSI]); - c->src.addr.mem.seg = seg_override(ctxt, ops, c), + c->src.addr.mem.seg = seg_override(ctxt, c); c->src.val = 0; break; case SrcImmFAddr: @@ -4103,7 +4102,7 @@ writeback: c->dst.type = saved_dst_type; if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt, ops, c), + string_addr_inc(ctxt, seg_override(ctxt, c), VCPU_REGS_RSI, &c->src); if ((c->d & DstMask) == DstDI) -- cgit v1.2.3-70-g09d2 From 509cf9fe11dad85f96944095ed63b2caa85cdfc9 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:25:07 +0900 Subject: KVM: x86 emulator: Remove unused arg from read_descriptor() Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3d0e5ac3a0c..c26243f6d66 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -729,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, } static int read_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { @@ -2720,7 +2719,7 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) struct desc_ptr desc_ptr; int rc; - rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, + rc = read_descriptor(ctxt, c->src.addr.mem, &desc_ptr.size, &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2749,9 +2748,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) struct desc_ptr desc_ptr; int rc; - rc = read_descriptor(ctxt, ctxt->ops, c->src.addr.mem, - &desc_ptr.size, - &desc_ptr.address, + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; -- cgit v1.2.3-70-g09d2 From adddcecf9222aa32938480cc1d03de629fab2a86 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:26:23 +0900 Subject: KVM: x86 emulator: Remove unused arg from writeback() Remove inline at this chance. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c26243f6d66..d4f4375c048 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1351,8 +1351,7 @@ static void write_register_operand(struct operand *op) } } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; struct decode_cache *c = &ctxt->decode; @@ -4089,7 +4088,7 @@ special_insn: goto done; writeback: - rc = writeback(ctxt, ops); + rc = writeback(ctxt); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.2.3-70-g09d2 From 3b9be3bf2e4d45828f84ba615283a53d11ebf470 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:27:55 +0900 Subject: KVM: x86 emulator: Remove unused arg from emulate_pop() The opt of emulate_grp1a() is also removed. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d4f4375c048..569e57dd1d5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1402,7 +1402,6 @@ static int em_push(struct x86_emulate_ctxt *ctxt) } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, void *dest, int len) { struct decode_cache *c = &ctxt->decode; @@ -1423,7 +1422,7 @@ static int em_pop(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); } static int emulate_popf(struct x86_emulate_ctxt *ctxt, @@ -1435,7 +1434,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; int cpl = ops->cpl(ctxt); - rc = emulate_pop(ctxt, ops, &val, len); + rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1494,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, unsigned long selector; int rc; - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + rc = emulate_pop(ctxt, &selector, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1544,7 +1543,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, ctxt->ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1633,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add stack limit check */ - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1641,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (temp_eip & ~0xffff) return emulate_gp(ctxt, 0); - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1688,12 +1687,11 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) @@ -1822,12 +1820,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, int rc; unsigned long cs; - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); @@ -2543,7 +2541,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); @@ -3918,7 +3916,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt, ops); + rc = emulate_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) -- cgit v1.2.3-70-g09d2 From 51187683cb11b959535d32eb91b673c6a9a03e88 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:29:17 +0900 Subject: KVM: x86 emulator: Rename emulate_grpX() to em_grpX() The prototypes are changed appropriately. We also replaces "goto grp45;" with simple em_grp45() call. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 569e57dd1d5..d9ebf6939e4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1687,14 +1687,14 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt) +static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } -static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) +static int em_grp2(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; switch (c->modrm_reg) { @@ -1721,10 +1721,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); break; } + return X86EMUL_CONTINUE; } -static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp3(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long *rax = &c->regs[VCPU_REGS_RAX]; @@ -1763,7 +1763,7 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int emulate_grp45(struct x86_emulate_ctxt *ctxt) +static int em_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1793,8 +1793,7 @@ static int emulate_grp45(struct x86_emulate_ctxt *ctxt) return rc; } -static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp9(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; u64 old = c->dst.orig_val64; @@ -3916,7 +3915,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt); + rc = em_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) @@ -3932,7 +3931,7 @@ special_insn: case 0xa8 ... 0xa9: /* test ax, imm */ goto test; case 0xc0 ... 0xc1: - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xc3: /* ret */ c->dst.type = OP_REG; @@ -3967,11 +3966,11 @@ special_insn: rc = emulate_iret(ctxt, ops); break; case 0xd0 ... 0xd1: /* Grp2 */ - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ c->src.val = c->regs[VCPU_REGS_RCX]; - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); @@ -4040,7 +4039,7 @@ special_insn: ctxt->eflags ^= EFLG_CF; break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); + rc = em_grp3(ctxt); break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; @@ -4071,13 +4070,13 @@ special_insn: ctxt->eflags |= EFLG_DF; break; case 0xfe: /* Grp4 */ - grp45: - rc = emulate_grp45(ctxt); + rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ if (c->modrm_reg == 5) goto jump_far; - goto grp45; + rc = em_grp45(ctxt); + break; default: goto cannot_emulate; } @@ -4344,7 +4343,7 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); + rc = em_grp9(ctxt); break; default: goto cannot_emulate; -- cgit v1.2.3-70-g09d2 From d2f62766d5778bbaf80d4feb90a23c7edc371a54 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 May 2011 02:30:48 +0900 Subject: KVM: x86 emulator: Make jmp far emulation into a separate function We introduce em_jmp_far(). We also call this from em_grp45() to stop treating modrm_reg == 5 case separately in the group 5 emulation. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d9ebf6939e4..d6e2477feb1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1687,6 +1687,23 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } +static int em_jmp_far(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned short sel; + + memcpy(&sel, c->src.valptr + c->op_bytes, 2); + + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) + return rc; + + c->eip = 0; + memcpy(&c->eip, c->src.valptr, c->op_bytes); + return X86EMUL_CONTINUE; +} + static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -1786,6 +1803,9 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) case 4: /* jmp abs */ c->eip = c->src.val; break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); + break; case 6: /* push */ rc = em_push(ctxt); break; @@ -3997,19 +4017,9 @@ special_insn: } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: { /* jmp far */ - unsigned short sel; - jump_far: - memcpy(&sel, c->src.valptr + c->op_bytes, 2); - - rc = load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS); - if (rc != X86EMUL_CONTINUE) - goto done; - - c->eip = 0; - memcpy(&c->eip, c->src.valptr, c->op_bytes); + case 0xea: /* jmp far */ + rc = em_jmp_far(ctxt); break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -4073,8 +4083,6 @@ special_insn: rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; rc = em_grp45(ctxt); break; default: -- cgit v1.2.3-70-g09d2 From c8cfbb555eb3632bf3dcbe1a591c1f4d0c28681c Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 1 May 2011 14:33:07 +0900 Subject: KVM: MMU: Use ptep_user for cmpxchg_gpte() The address of the gpte was already calculated and stored in ptep_user before entering cmpxchg_gpte(). This patch makes cmpxchg_gpte() to use that to make it clear that we are using the same address during walk_addr_generic(). Note that the unlikely annotations are used to show that the conditions are something unusual rather than for performance. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index e3f81418797..6c4dc010c4c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -79,21 +79,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) } static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t table_gfn, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) + pt_element_t __user *ptep_user, unsigned index, + pt_element_t orig_pte, pt_element_t new_pte) { + int npages; pt_element_t ret; pt_element_t *table; struct page *page; - gpa_t gpa; - gpa = mmu->translate_gpa(vcpu, table_gfn << PAGE_SHIFT, - PFERR_USER_MASK|PFERR_WRITE_MASK); - if (gpa == UNMAPPED_GVA) + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + /* Check if the user is doing something meaningless. */ + if (unlikely(npages != 1)) return -EFAULT; - page = gfn_to_page(vcpu->kvm, gpa_to_gfn(gpa)); - table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); kunmap_atomic(table, KM_USER0); @@ -220,9 +218,9 @@ walk: int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, - index, pte, pte|PT_ACCESSED_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) { present = false; break; } else if (ret) @@ -279,9 +277,9 @@ walk: int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, table_gfn, index, pte, - pte|PT_DIRTY_MASK); - if (ret < 0) { + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_DIRTY_MASK); + if (unlikely(ret < 0)) { present = false; goto error; } else if (ret) -- cgit v1.2.3-70-g09d2