From 33fb20c39e98b90813b5ab2d9a0d6faa6300caca Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 6 Mar 2013 15:44:03 +0100 Subject: KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS Properly set those bits to 1 that the spec demands in case bit 55 of VMX_BASIC is 0 - like in our case. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e39..5fb6e24f064 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -81,6 +81,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 @@ -89,6 +91,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, -- cgit v1.2.3-70-g09d2 From 57f252f22908535e04d520f3833a6e3116eb159d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 12 Mar 2013 10:20:24 +0100 Subject: KVM: x86: Drop unused return code from VCPU reset callback Neither vmx nor svm nor the common part may generate an error on kvm_vcpu_reset. So drop the return code. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 7 +------ arch/x86/kvm/x86.c | 15 ++++++--------- 4 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 635a74d2240..348d85965ea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -643,7 +643,7 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - int (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e1b1ce21bc0..907e4280116 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1191,7 +1191,7 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 dummy; @@ -1207,8 +1207,6 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); - - return 0; } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 473a5fe7e00..f588171be17 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4100,11 +4100,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 msr; - int ret; vmx->rmode.vm86_active = 0; @@ -4195,10 +4194,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) update_exception_bitmap(&vmx->vcpu); vpid_sync_context(vmx); - - ret = 0; - - return ret; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b67985af175..fadd5a75047 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,7 +162,7 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); +static void kvm_vcpu_reset(struct kvm_vcpu *vcpu); static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { @@ -5858,9 +5858,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_vcpu_reset(vcpu); - if (r) - return r; + kvm_vcpu_reset(vcpu); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } @@ -6486,9 +6484,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_vcpu_reset(vcpu); - if (r == 0) - r = kvm_mmu_setup(vcpu); + kvm_vcpu_reset(vcpu); + r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); return r; @@ -6525,7 +6522,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) +static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6552,7 +6549,7 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - return kvm_x86_ops->vcpu_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu); } int kvm_arch_hardware_enable(void *garbage) -- cgit v1.2.3-70-g09d2 From 66450a21f99636af4fafac2afd33f1a40631bc3a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 12:42:34 +0100 Subject: KVM: x86: Rework INIT and SIPI handling A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. This introduces APIC events for those two signals, keeping them in kvm_apic until kvm_apic_accept_events is run over the target vcpu context. kvm_apic_has_events reports to kvm_arch_vcpu_runnable if there are pending events, thus if vcpu blocking should end. The patch comes with the side effect of effectively obsoleting KVM_MP_STATE_SIPI_RECEIVED. We still accept it from user space, but immediately translate it to KVM_MP_STATE_INIT_RECEIVED + KVM_APIC_SIPI. The vcpu itself will no longer enter the KVM_MP_STATE_SIPI_RECEIVED state. That also means we no longer exit to user space after receiving a SIPI event. Furthermore, we already reset the VCPU on INIT, only fixing up the code segment later on when SIPI arrives. Moreover, we fix INIT handling for the BSP: it never enter wait-for-SIPI but directly starts over on INIT. Tested-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/lapic.c | 48 ++++++++++++++++++++++++++++------ arch/x86/kvm/lapic.h | 11 ++++++++ arch/x86/kvm/svm.c | 6 ----- arch/x86/kvm/vmx.c | 12 ++------- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++-------------- 6 files changed, 93 insertions(+), 45 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 348d85965ea..ef7f4a5cf8c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -345,7 +345,6 @@ struct kvm_vcpu_arch { unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; - int sipi_vector; u64 ia32_misc_enable_msr; bool tpr_access_reporting; @@ -819,6 +818,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -1002,6 +1002,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd4e4a..a8e9369f41c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,7 +731,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + /* assumes that there are only KVM_APIC_INIT/SIPI */ + apic->pending_events = (1UL << KVM_APIC_INIT); + /* make sure pending_events is visible before sending + * the request */ + smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } else { @@ -743,13 +747,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - result = 1; - vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } + result = 1; + apic->sipi_vector = vector; + /* make sure sipi_vector is visible for the receiver */ + smp_wmb(); + set_bit(KVM_APIC_SIPI, &apic->pending_events); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_EXTINT: @@ -1860,6 +1864,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) addr); } +void kvm_apic_accept_events(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + unsigned int sipi_vector; + + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { + kvm_lapic_reset(vcpu); + kvm_vcpu_reset(vcpu); + if (kvm_vcpu_is_bsp(apic->vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + } + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, sipi_vector); + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } +} + void kvm_lapic_init(void) { /* do not patch jump label more than once per second */ diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4..2c721b986ee 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -5,6 +5,9 @@ #include +#define KVM_APIC_INIT 0 +#define KVM_APIC_SIPI 1 + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -32,6 +35,8 @@ struct kvm_lapic { void *regs; gpa_t vapic_addr; struct page *vapic_page; + unsigned long pending_events; + unsigned int sipi_vector; }; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -158,4 +164,9 @@ void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, u64 *eoi_bitmap); +static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.apic->pending_events; +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 907e4280116..7219a4012a0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1199,12 +1199,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f588171be17..af1ffaf2089 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4119,12 +4119,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - else { - vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); - vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); - } + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4147,10 +4142,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_writel(GUEST_RFLAGS, 0x02); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - kvm_rip_write(vcpu, 0xfff0); - else - kvm_rip_write(vcpu, 0); + kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fadd5a75047..61a5bb60af8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu); - static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -2830,10 +2828,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); events->nmi.pad = 0; - events->sipi_vector = vcpu->arch.sipi_vector; + events->sipi_vector = 0; /* never valid when reporting to user space */ events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -2864,8 +2861,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && + kvm_vcpu_has_lapic(vcpu)) + vcpu->arch.apic->sipi_vector = events->sipi_vector; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5720,6 +5718,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + r = 1; + goto out; + } + inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ @@ -5854,14 +5858,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) int r; struct kvm *kvm = vcpu->kvm; - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { - pr_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->arch.sipi_vector); - kvm_lapic_reset(vcpu); - kvm_vcpu_reset(vcpu); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - } - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); r = vapic_enter(vcpu); if (r) { @@ -5878,8 +5874,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) - { + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: vcpu->arch.mp_state = @@ -5887,7 +5883,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; - case KVM_MP_STATE_SIPI_RECEIVED: + case KVM_MP_STATE_INIT_RECEIVED: + break; default: r = -EINTR; break; @@ -6022,6 +6019,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); + kvm_apic_accept_events(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); r = -EAGAIN; goto out; @@ -6178,6 +6176,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + kvm_apic_accept_events(vcpu); mp_state->mp_state = vcpu->arch.mp_state; return 0; } @@ -6185,7 +6184,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu->arch.mp_state = mp_state->mp_state; + if (!kvm_vcpu_has_lapic(vcpu) && + mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + return -EINVAL; + + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); + } else + vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -6522,7 +6529,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6552,6 +6559,17 @@ static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_reset(vcpu); } +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +{ + struct kvm_segment cs; + + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs.selector = vector << 8; + cs.base = vector << 12; + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_rip_write(vcpu, 0); +} + int kvm_arch_hardware_enable(void *garbage) { struct kvm *kvm; @@ -6995,7 +7013,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || kvm_apic_has_events(vcpu) || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); -- cgit v1.2.3-70-g09d2 From eabeaaccfca0ed61b8e00a09b8cfa703c4f11b59 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 11:30:50 +0100 Subject: KVM: nVMX: Clean up and fix pin-based execution controls Only interrupt and NMI exiting are mandatory for KVM to work, thus can be exposed to the guest unconditionally, virtual NMI exiting is optional. So we must not advertise it unless the host supports it. Introduce the symbolic constant PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR at this chance. Reviewed-by:: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/vmx.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5fb6e24f064..3c9f455bace 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,8 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af1ffaf2089..8eaabfb2023 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2040,14 +2040,16 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ /* pin-based controls */ + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, + nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); /* * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. */ - nested_vmx_pinbased_ctls_low = 0x16 ; - nested_vmx_pinbased_ctls_high = 0x16 | - PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; /* * Exit controls -- cgit v1.2.3-70-g09d2 From c18911a23ce1dec27fa3325b50587de2569d26f8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 16:06:41 +0100 Subject: KVM: nVMX: Provide EFER.LMA saving support We will need EFER.LMA saving to provide unrestricted guest mode. All what is missing for this is picking up EFER.LMA from VM_ENTRY_CONTROLS on L2->L1 switches. If the host does not support EFER.LMA saving, no change is performed, otherwise we properly emulate for L1 what the hardware does for L0. Advertise the support, depending on the host feature. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/vmx.c | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 3c9f455bace..056bda586a4 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -95,6 +95,8 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8eaabfb2023..02f8c32b9b0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2022,6 +2022,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_misc_low, nested_vmx_misc_high; static __init void nested_vmx_setup_ctls_msrs(void) { /* @@ -2106,6 +2107,11 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_WBINVD_EXITING; + + /* miscellaneous data */ + rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); + nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_high = 0; } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) @@ -2176,7 +2182,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_entry_ctls_high); break; case MSR_IA32_VMX_MISC: - *pdata = 0; + *pdata = vmx_control_msr(nested_vmx_misc_low, + nested_vmx_misc_high); break; /* * These MSRs specify bits which the guest must keep fixed (on or off) @@ -7398,6 +7405,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + vmcs12->vm_entry_controls = + (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | + (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); -- cgit v1.2.3-70-g09d2 From 0238ea913c21a89387f93097acfbdfeebc9c9257 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 11:31:24 +0100 Subject: KVM: nVMX: Add preemption timer support Provided the host has this feature, it's straightforward to offer it to the guest as well. We just need to load to timer value on L2 entry if the feature was enabled by L1 and watch out for the corresponding exit reason. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 3 +++ arch/x86/include/uapi/asm/vmx.h | 5 +++-- arch/x86/kvm/vmx.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 056bda586a4..fc1c3134473 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -70,6 +70,7 @@ #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -95,6 +96,7 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 /* VMCS Encodings */ @@ -217,6 +219,7 @@ enum vmcs_field { GUEST_INTERRUPTIBILITY_INFO = 0x00004824, GUEST_ACTIVITY_STATE = 0X00004826, GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, CR0_GUEST_HOST_MASK = 0x00006000, CR4_GUEST_HOST_MASK = 0x00006002, diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee6..d651082c7cf 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -110,7 +111,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } - + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02f8c32b9b0..17a69386845 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -298,7 +298,8 @@ struct __packed vmcs12 { u32 guest_activity_state; u32 guest_sysenter_cs; u32 host_ia32_sysenter_cs; - u32 padding32[8]; /* room for future expansion */ + u32 vmx_preemption_timer_value; + u32 padding32[7]; /* room for future expansion */ u16 virtual_processor_id; u16 guest_es_selector; u16 guest_cs_selector; @@ -537,6 +538,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), + FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), @@ -2049,7 +2051,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | - PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | + PIN_BASED_VMX_PREEMPTION_TIMER; nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; /* @@ -2110,7 +2113,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); - nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | + VMX_MISC_SAVE_EFER_LMA; nested_vmx_misc_high = 0; } @@ -6190,6 +6194,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_EPT_VIOLATION: case EXIT_REASON_EPT_MISCONFIG: return 0; + case EXIT_REASON_PREEMPTION_TIMER: + return vmcs12->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -7011,6 +7018,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs_config.pin_based_exec_ctrl | vmcs12->pin_based_vm_exec_control)); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + /* * Whether page-faults are trapped is determined by a combination of * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. -- cgit v1.2.3-70-g09d2 From 95b0430d1a53541076ffbaf453f8b49a547cceba Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 12 Mar 2013 17:44:40 +0900 Subject: KVM: MMU: Mark sp mmio cached when creating mmio spte This will be used not to zap unrelated mmu pages when creating/moving a memory slot later. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ef7f4a5cf8c..9b75cae83d1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -230,6 +230,7 @@ struct kvm_mmu_page { #endif int write_flooding_count; + bool mmio_cached; }; struct kvm_pio_request { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fdacabba6f6..de45ec19534 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + access &= ACC_WRITE_MASK | ACC_USER_MASK; + sp->mmio_cached = true; trace_mark_mmio_spte(sptep, gfn, access); mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); } -- cgit v1.2.3-70-g09d2 From 982b3394dd23eec6e5a2f7871238435a167b63cc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 12 Mar 2013 17:45:30 +0900 Subject: KVM: x86: Optimize mmio spte zapping when creating/moving memslot When we create or move a memory slot, we need to zap mmio sptes. Currently, zap_all() is used for this and this is causing two problems: - extra page faults after zapping mmu pages - long mmu_lock hold time during zapping mmu pages For the latter, Marcelo reported a disastrous mmu_lock hold time during hot-plug, which made the guest unresponsive for a long time. This patch takes a simple way to fix these problems: do not zap mmu pages unless they are marked mmio cached. On our test box, this took only 50us for the 4GB guest and we did not see ms of mmu_lock hold time any more. Note that we still need to do zap_all() for other cases. So another work is also needed: Xiao's work may be the one. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 18 ++++++++++++++++++ arch/x86/kvm/x86.c | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9b75cae83d1..3f205c6cde5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -767,6 +767,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index de45ec19534..c1a9b7b08ab 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4189,6 +4189,24 @@ restart: spin_unlock(&kvm->mmu_lock); } +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) +{ + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); + + spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (!sp->mmio_cached) + continue; + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + goto restart; + } + + kvm_mmu_commit_zap_page(kvm, &invalid_list); + spin_unlock(&kvm->mmu_lock); +} + static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 61a5bb60af8..d3c478742e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6991,7 +6991,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * mmio sptes. */ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - kvm_mmu_zap_all(kvm); + kvm_mmu_zap_mmio_sptes(kvm); kvm_reload_remote_mmus(kvm); } } -- cgit v1.2.3-70-g09d2 From afd80d85aefac27e6e2f9dc10f60515357c504d2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2013 17:18:35 +0100 Subject: pmu: prepare for migration support In order to migrate the PMU state correctly, we need to restore the values of MSR_CORE_PERF_GLOBAL_STATUS (a read-only register) and MSR_CORE_PERF_GLOBAL_OVF_CTRL (which has side effects when written). We also need to write the full 40-bit value of the performance counter, which would only be possible with a v3 architectural PMU's full-width counter MSRs. To distinguish host-initiated writes from the guest's, pass the full struct msr_data to kvm_pmu_set_msr. Signed-off-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/pmu.c | 14 +++++++++++--- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b5a64621d5a..3dd84c996d5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1030,7 +1030,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf9..c53e797e736 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) return 1; } -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + u32 index = msr_info->index; + u64 data = msr_info->data; switch (index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } break; case MSR_CORE_PERF_GLOBAL_STATUS: + if (msr_info->host_initiated) { + pmu->global_status = data; + return 0; + } break; /* RO MSR */ case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { - pmu->global_status &= ~data; + if (!msr_info->host_initiated) + pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; return 0; } @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) default: if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || (pmc = get_fixed_pmc(pmu, index))) { - data = (s64)(s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; pmc->counter += data - read_pmc(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2aaba814f1c..78c6f90a60c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2040,7 +2040,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (pr || data != 0) vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " @@ -2086,7 +2086,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); -- cgit v1.2.3-70-g09d2 From fc1b74925f87f6aca5432eb73f6a57eff30afde7 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 5 Apr 2013 19:20:30 +0000 Subject: KVM: Move vm_list kvm_lock declarations out of x86 The variables vm_list and kvm_lock are common to all architectures, so move the declarations from arch/x86/include/asm/kvm_host.h to include/linux/kvm_host.h. Fixes sparse warnings like these when building for arm64: virt/kvm/kvm_main.c: warning: symbol 'kvm_lock' was not declared. Should it be static? virt/kvm/kvm_main.c: warning: symbol 'vm_list' was not declared. Should it be static? Signed-off-by: Geoff Levand Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 3 --- include/linux/kvm_host.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3dd84c996d5..628163c0c6e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -94,9 +94,6 @@ #define ASYNC_PF_PER_VCPU 64 -extern raw_spinlock_t kvm_lock; -extern struct list_head vm_list; - struct kvm_vcpu; struct kvm; struct kvm_async_pf; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c0be23f874..71fed38c720 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -135,6 +135,9 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; +extern raw_spinlock_t kvm_lock; +extern struct list_head vm_list; + struct kvm_io_range { gpa_t addr; int len; -- cgit v1.2.3-70-g09d2 From 8b415dcd762607379cf0a69c9dd25940da1d174e Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 5 Apr 2013 19:20:30 +0000 Subject: KVM: Move kvm_rebooting declaration out of x86 The variable kvm_rebooting is a common kvm variable, so move its declaration from arch/x86/include/asm/kvm_host.h to include/asm/kvm_host.h. Fixes this sparse warning when building on arm64: virt/kvm/kvm_main.c:warning: symbol 'kvm_rebooting' was not declared. Should it be static? Signed-off-by: Geoff Levand Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 - include/linux/kvm_host.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 628163c0c6e..b2c7263c93b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -972,7 +972,6 @@ enum { * Trap the fault and ignore the instruction if that happens. */ asmlinkage void kvm_spurious_fault(void); -extern bool kvm_rebooting; #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ "666: " insn "\n\t" \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 71fed38c720..20d77d24d76 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1061,6 +1061,8 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +extern bool kvm_rebooting; + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) -- cgit v1.2.3-70-g09d2 From 991eebf9f8e523e7ff1e4d31ac80641582b2e57a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 11 Apr 2013 12:10:51 +0300 Subject: KVM: VMX: do not try to reexecute failed instruction while emulating invalid guest state During invalid guest state emulation vcpu cannot enter guest mode to try to reexecute instruction that emulator failed to emulate, so emulation will happen again and again. Prevent that by telling the emulator that instruction reexecution should not be attempted. Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 13 +++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b2c7263c93b..82f1dc67782 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -795,6 +795,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) +#define EMULTYPE_NO_REEXECUTE (1 << 4) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 669b8037873..d2686771481 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5189,7 +5189,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, 0); + err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_DO_MMIO) { ret = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb9927e0af1..999d1243a6c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4765,11 +4765,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, - bool write_fault_to_shadow_pgtable) + bool write_fault_to_shadow_pgtable, + int emulation_type) { gpa_t gpa = cr2; pfn_t pfn; + if (emulation_type & EMULTYPE_NO_REEXECUTE) + return false; + if (!vcpu->arch.mmu.direct_map) { /* * Write permission should be allowed since only @@ -4912,8 +4916,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2, - write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4943,7 +4947,8 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; return handle_emulation_failure(vcpu); -- cgit v1.2.3-70-g09d2 From a547c6db4d2f16ba5ce8e7054bffad6acc248d40 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 11 Apr 2013 19:25:10 +0800 Subject: KVM: VMX: Enable acknowledge interupt on vmexit The "acknowledge interrupt on exit" feature controls processor behavior for external interrupt acknowledgement. When this control is set, the processor acknowledges the interrupt controller to acquire the interrupt vector on VM exit. After enabling this feature, an interrupt which arrived when target cpu is running in vmx non-root mode will be handled by vmx handler instead of handler in idt. Currently, vmx handler only fakes an interrupt stack and jump to idt table to let real handler to handle it. Further, we will recognize the interrupt and only delivery the interrupt which not belong to current vcpu through idt table. The interrupt which belonged to current vcpu will be handled inside vmx handler. This will reduce the interrupt handle cost of KVM. Also, interrupt enable logic is changed if this feature is turnning on: Before this patch, hypervior call local_irq_enable() to enable it directly. Now IF bit is set on interrupt stack frame, and will be enabled on a return from interrupt handler if exterrupt interrupt exists. If no external interrupt, still call local_irq_enable() to enable it. Refer to Intel SDM volum 3, chapter 33.2. Signed-off-by: Yang Zhang Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 +++++ arch/x86/kvm/vmx.c | 58 ++++++++++++++++++++++++++++++++++++++--- arch/x86/kvm/x86.c | 4 ++- 4 files changed, 64 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 82f1dc67782..68d438630dd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -727,6 +727,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); + void (*handle_external_intr)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7a46c1f4686..2f8fe3f0683 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4233,6 +4233,11 @@ out: return ret; } +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4328,6 +4333,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 71755573b7c..7a7605f0444 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -378,6 +378,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -2627,7 +2628,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | + VM_EXIT_ACK_INTR_ON_EXIT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; @@ -3879,7 +3881,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) * Note that host-state that does change is set elsewhere. E.g., host-state * that is set differently for each CPU is set in vmx_vcpu_load(), not here. */ -static void vmx_set_constant_host_state(void) +static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) { u32 low32, high32; unsigned long tmpl; @@ -3907,6 +3909,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + vmx->host_idt_base = dt.address; vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ @@ -4039,7 +4042,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ @@ -6399,6 +6402,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) } } +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * If external interrupt exists, IF bit is set in rflags/eflags on the + * interrupt stack frame, and interrupt will be enabled on a return + * from interrupt handler. + */ + if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) + == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { + unsigned int vector; + unsigned long entry; + gate_desc *desc; + struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + unsigned long tmp; +#endif + + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)vmx->host_idt_base + vector; + entry = gate_offset(*desc); + asm volatile( +#ifdef CONFIG_X86_64 + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + "orl $0x200, (%%" _ASM_SP ")\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + "call *%[entry]\n\t" + : +#ifdef CONFIG_X86_64 + [sp]"=&r"(tmp) +#endif + : + [entry]"r"(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + } else + local_irq_enable(); +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -7041,7 +7090,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Other fields are different per CPU, and will be set later when * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before @@ -7718,6 +7767,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, + .handle_external_intr = vmx_handle_external_intr, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae9744d03c8..1562671a8e1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5820,7 +5820,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); + + /* Interrupt is enabled by handle_external_intr() */ + kvm_x86_ops->handle_external_intr(vcpu); ++vcpu->stat.exits; -- cgit v1.2.3-70-g09d2 From d78f2664832f8d70e36422af9a10e44276dced48 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 11 Apr 2013 19:25:11 +0800 Subject: KVM: VMX: Register a new IPI for posted interrupt Posted Interrupt feature requires a special IPI to deliver posted interrupt to guest. And it should has a high priority so the interrupt will not be blocked by others. Normally, the posted interrupt will be consumed by vcpu if target vcpu is running and transparent to OS. But in some cases, the interrupt will arrive when target vcpu is scheduled out. And host will see it. So we need to register a dump handler to handle it. Signed-off-by: Yang Zhang Acked-by: Ingo Molnar Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/entry_arch.h | 4 ++++ arch/x86/include/asm/hardirq.h | 3 +++ arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 5 +++++ arch/x86/kernel/entry_64.S | 5 +++++ arch/x86/kernel/irq.c | 22 ++++++++++++++++++++++ arch/x86/kernel/irqinit.c | 4 ++++ 7 files changed, 44 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c6..9bd4ecac72b 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f7..ab0ae1aa6d0 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -11,6 +11,9 @@ typedef struct { unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; unsigned int icr_read_retry_count; +#endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5..1da97efad08 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86..5702d7e3111 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca79..72720894103 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f10591..6ae6ea1d27d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + inc_irq_stat(kvm_posted_intr_ipis); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b..a2a1fbc594f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); -- cgit v1.2.3-70-g09d2 From 01e439be7753c163932538276f04f95cb1b66697 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 11 Apr 2013 19:25:12 +0800 Subject: KVM: VMX: Check the posted interrupt capability Detect the posted interrupt feature. If it exists, then set it in vmcs_config. Signed-off-by: Yang Zhang Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 4 +++ arch/x86/kvm/vmx.c | 82 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 20 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fc1c3134473..6f07f199913 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -102,6 +103,7 @@ /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -136,6 +138,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7a7605f0444..7607c6c09c7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid; +static bool __read_mostly enable_apicv; +module_param(enable_apicv, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use @@ -366,6 +367,14 @@ struct nested_vmx { struct page *apic_access_page; }; +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + u32 control; /* bit 0 of control is outstanding notification bit */ + u32 rsvd[7]; +} __aligned(64); + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -430,6 +439,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -785,6 +797,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_posted_intr(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static inline bool cpu_has_vmx_apicv(void) +{ + return cpu_has_vmx_apic_register_virt() && + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2552,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -2634,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_vmexit_control) < 0) return -EIO; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2812,11 +2841,10 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt() || - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; - if (enable_apicv_reg_vid) + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; else kvm_x86_ops->hwapic_irr_update = NULL; @@ -3875,6 +3903,11 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv && irqchip_in_kernel(kvm); +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -3935,6 +3968,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + return pin_based_exec_ctrl; +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -3952,11 +3994,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static int vmx_vm_has_apicv(struct kvm *kvm) -{ - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); -} - static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4012,8 +4049,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -4022,13 +4058,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } - if (enable_apicv_reg_vid) { + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); vmcs_write64(EOI_EXIT_BITMAP3, 0); vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } if (ple_gap) { @@ -4170,6 +4209,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx_vm_has_apicv(vcpu->kvm)) + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); + if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); @@ -7842,7 +7884,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg_vid) { + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); -- cgit v1.2.3-70-g09d2 From a20ed54d6e470bf0d28921b7aadb6ca0da0ff0c3 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 11 Apr 2013 19:25:15 +0800 Subject: KVM: VMX: Add the deliver posted interrupt algorithm Only deliver the posted interrupt when target vcpu is running and there is no previous interrupt pending in pir. Signed-off-by: Yang Zhang Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/lapic.c | 13 +++++++++ arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/svm.c | 6 ++++ arch/x86/kvm/vmx.c | 64 ++++++++++++++++++++++++++++++++++++++++- virt/kvm/kvm_main.c | 1 + 6 files changed, 86 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 68d438630dd..599f98b612d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -701,6 +701,8 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d197579435d..dbf74c922aa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -318,6 +318,19 @@ static u8 count_vectors(void *bitmap) return count; } +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +{ + u32 i, pir_val; + struct kvm_lapic *apic = vcpu->arch.apic; + + for (i = 0; i <= 7; i++) { + pir_val = xchg(&pir[i], 0); + if (pir_val) + *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; + } +} +EXPORT_SYMBOL_GPL(kvm_apic_update_irr); + static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) { apic->irr_pending = true; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7fe0c9180ea..c730ac9fe80 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -54,6 +54,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2f8fe3f0683..d6713e18bbc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3577,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) return; } +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4305,6 +4310,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8535519d035..3a14d8a0ee4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -375,6 +375,23 @@ struct pi_desc { u32 rsvd[7]; } __aligned(64); +static bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -639,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2846,8 +2864,11 @@ static __init int hardware_setup(void) if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; - else + else { kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } if (nested) nested_vmx_setup_ctls_msrs(); @@ -3908,6 +3929,45 @@ static int vmx_vm_has_apicv(struct kvm *kvm) return enable_apicv && irqchip_in_kernel(kvm); } +/* + * Send interrupt to vcpu via posted interrupt way. + * 1. If target vcpu is running(non-root mode), send posted interrupt + * notification to vcpu and hardware will sync PIR to vIRR atomically. + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + r = pi_test_and_set_on(&vmx->pi_desc); + kvm_make_request(KVM_REQ_EVENT, vcpu); + if (!r && (vcpu->mode == IN_GUEST_MODE)) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + else + kvm_vcpu_kick(vcpu); +} + +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!pi_test_and_clear_on(&vmx->pi_desc)) + return; + + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); +} + +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) +{ + return; +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -7784,6 +7844,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 65a9e0716a8..aaac1a7a9ea 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1671,6 +1671,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 89662e566c73230dab9f1d99dbe2f6d9c8c189a8 Mon Sep 17 00:00:00 2001 From: Abel Gordon Date: Thu, 18 Apr 2013 14:34:55 +0300 Subject: KVM: nVMX: Shadow-vmcs control fields/bits Add definitions for all the vmcs control fields/bits required to enable vmcs-shadowing Signed-off-by: Abel Gordon Reviewed-by: Orit Wasserman Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 3 +++ arch/x86/include/uapi/asm/msr-index.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 6f07f199913..f3e01a2cbaa 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -65,6 +65,7 @@ #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -150,6 +151,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMWRITE_BITMAP = 0x00002028, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40a747..dc469e3e708 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -522,6 +522,8 @@ #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 -- cgit v1.2.3-70-g09d2 From 384bb783275145b70d769acf4c687957d1c61802 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 20 Apr 2013 10:52:36 +0200 Subject: KVM: nVMX: Validate EFER values for VM_ENTRY/EXIT_LOAD_IA32_EFER As we may emulate the loading of EFER on VM-entry and VM-exit, implement the checks that VMX performs on the guest and host values on vmlaunch/ vmresume. Factor out kvm_valid_efer for this purpose which checks for set reserved bits. Signed-off-by: Jan Kiszka Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 40 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 29 +++++++++++++++++++---------- 3 files changed, 60 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 599f98b612d..18635ae42a8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -809,6 +809,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, } void kvm_enable_efer_bits(u64); +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 11ea3cbbb78..d7ef5568622 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7558,6 +7558,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; struct loaded_vmcs *vmcs02; + bool ia32e; if (!nested_vmx_check_permission(vcpu) || !nested_vmx_check_vmcs12(vcpu)) @@ -7648,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } + /* + * If the “load IA32_EFER” VM-entry control is 1, the following checks + * are performed on the field for the IA32_EFER MSR: + * - Bits reserved in the IA32_EFER MSR must be 0. + * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of + * the IA-32e mode guest VM-exit control. It must also be identical + * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to + * CR0.PG) is 1. + */ + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || + ((vmcs12->guest_cr0 & X86_CR0_PG) && + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + + /* + * If the load IA32_EFER VM-exit control is 1, bits reserved in the + * IA32_EFER MSR must be 0 in the field for that register. In addition, + * the values of the LMA and LME bits in the field must each be that of + * the host address-space size VM-exit control. + */ + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_exit_controls & + VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + /* * We're finally done with prerequisite checking, and can start with * the nested entry. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e730a462ed0..2a434bf3918 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -845,23 +845,17 @@ static const u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, }; -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - u64 old_efer = vcpu->arch.efer; - if (efer & efer_reserved_bits) - return 1; - - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + return false; if (efer & EFER_FFXSR) { struct kvm_cpuid_entry2 *feat; feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) - return 1; + return false; } if (efer & EFER_SVME) { @@ -869,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) - return 1; + return false; } + return true; +} +EXPORT_SYMBOL_GPL(kvm_valid_efer); + +static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + u64 old_efer = vcpu->arch.efer; + + if (!kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) + && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; -- cgit v1.2.3-70-g09d2 From 8175e5b79c38a1d85225da516fa1a0ecbf2fdbca Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 15 Apr 2013 10:42:33 +0200 Subject: KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS The concept of routing interrupt lines to an irqchip is nothing that is IOAPIC specific. Every irqchip has a maximum number of pins that can be linked to irq lines. So let's add a new define that allows us to reuse generic code for non-IOAPIC platforms. Signed-off-by: Alexander Graf Acked-by: Michael S. Tsirkin --- arch/x86/include/asm/kvm_host.h | 2 ++ include/linux/kvm_host.h | 2 +- virt/kvm/irq_comm.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 18635ae42a8..14337fa464b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,6 +43,8 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93a50054d46..bf3b1dcb8b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,7 +307,7 @@ struct kvm_kernel_irq_routing_entry { #ifdef __KVM_HAVE_IOAPIC struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 25ab48007ad..7c0071de9e8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -480,7 +480,7 @@ int kvm_set_irq_routing(struct kvm *kvm, new->nr_rt_entries = nr_rt_entries; for (i = 0; i < 3; i++) - for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) + for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { -- cgit v1.2.3-70-g09d2 From 730dca42c1d363c939da18c1499c7327c66e2b37 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 28 Apr 2013 10:50:52 +0200 Subject: KVM: x86: Rework request for immediate exit The VMX implementation of enable_irq_window raised KVM_REQ_IMMEDIATE_EXIT after we checked it in vcpu_enter_guest. This caused infinite loops on vmentry. Fix it by letting enable_irq_window signal the need for an immediate exit via its return value and drop KVM_REQ_IMMEDIATE_EXIT. This issue only affects nested VMX scenarios. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 3 ++- arch/x86/kvm/vmx.c | 15 ++++++++------- arch/x86/kvm/x86.c | 7 +++---- include/linux/kvm_host.h | 15 +++++++-------- 5 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 18635ae42a8..111b4a0c390 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -694,7 +694,7 @@ struct kvm_x86_ops { bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); - void (*enable_irq_window)(struct kvm_vcpu *vcpu); + int (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*vm_has_apicv)(struct kvm *kvm); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 15c9cccd716..7f896cbe717 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3632,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) return ret; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static int enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3646,6 +3646,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } + return 0; } static void enable_nmi_window(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0f0cb311062..74c525e2c60 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4398,22 +4398,23 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) PIN_BASED_NMI_EXITING; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static int enable_irq_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) /* * We get here if vmx_interrupt_allowed() said we can't - * inject to L1 now because L2 must run. Ask L2 to exit - * right after entry, so we can inject to L1 more promptly. + * inject to L1 now because L2 must run. The caller will have + * to make L2 exit right after entry, so we can inject to L1 + * more promptly. */ - kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); - return; - } + return -EBUSY; cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } static void enable_nmi_window(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a434bf3918..c522260b5bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5692,7 +5692,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) int r; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; - bool req_immediate_exit = 0; + bool req_immediate_exit = false; if (vcpu->requests) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) @@ -5734,8 +5734,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) record_steal_time(vcpu); if (kvm_check_request(KVM_REQ_NMI, vcpu)) process_nmi(vcpu); - req_immediate_exit = - kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); if (kvm_check_request(KVM_REQ_PMU, vcpu)) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) @@ -5757,7 +5755,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_irq_window(vcpu) != 0; if (kvm_lapic_enabled(vcpu)) { /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93a50054d46..7bde42470e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -119,14 +119,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APF_HALT 12 #define KVM_REQ_STEAL_UPDATE 13 #define KVM_REQ_NMI 14 -#define KVM_REQ_IMMEDIATE_EXIT 15 -#define KVM_REQ_PMU 16 -#define KVM_REQ_PMI 17 -#define KVM_REQ_WATCHDOG 18 -#define KVM_REQ_MASTERCLOCK_UPDATE 19 -#define KVM_REQ_MCLOCK_INPROGRESS 20 -#define KVM_REQ_EPR_EXIT 21 -#define KVM_REQ_SCAN_IOAPIC 22 +#define KVM_REQ_PMU 15 +#define KVM_REQ_PMI 16 +#define KVM_REQ_WATCHDOG 17 +#define KVM_REQ_MASTERCLOCK_UPDATE 18 +#define KVM_REQ_MCLOCK_INPROGRESS 19 +#define KVM_REQ_EPR_EXIT 20 +#define KVM_REQ_SCAN_IOAPIC 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3-70-g09d2 From cbf64358588ae45dcf0207dbc97fba783577d64a Mon Sep 17 00:00:00 2001 From: Chegu Vinod Date: Sat, 27 Apr 2013 18:31:04 -0700 Subject: KVM: x86: Increase the "hard" max VCPU limit KVM guests today use 8bit APIC ids allowing for 256 ID's. Reserving one ID for Broadcast interrupts should leave 255 ID's. In case of KVM there is no need for reserving another ID for IO-APIC so the hard max limit for VCPUS can be increased from 254 to 255. (This was confirmed by Gleb Natapov http://article.gmane.org/gmane.comp.emulators.kvm.devel/99713 ) Signed-off-by: Chegu Vinod Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 38f2cc0a5a2..ec14b7245a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,7 +31,7 @@ #include #include -#define KVM_MAX_VCPUS 254 +#define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 #define KVM_USER_MEM_SLOTS 125 /* memory slots that are not exposed to userspace */ -- cgit v1.2.3-70-g09d2 From 03b28f8133165dbe4cd922054d599e26b8119508 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 29 Apr 2013 16:46:42 +0200 Subject: KVM: x86: Account for failing enable_irq_window for NMI window request With VMX, enable_irq_window can now return -EBUSY, in which case an immediate exit shall be requested before entering the guest. Account for this also in enable_nmi_window which uses enable_irq_window in absence of vnmi support, e.g. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 5 +++-- arch/x86/kvm/vmx.c | 16 +++++++--------- arch/x86/kvm/x86.c | 3 ++- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ec14b7245a4..3741c653767 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -695,7 +695,7 @@ struct kvm_x86_ops { int (*nmi_allowed)(struct kvm_vcpu *vcpu); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); - void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + int (*enable_nmi_window)(struct kvm_vcpu *vcpu); int (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*vm_has_apicv)(struct kvm *kvm); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7f896cbe717..3421d5a92c0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3649,13 +3649,13 @@ static int enable_irq_window(struct kvm_vcpu *vcpu) return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK) - return; /* IRET will cause a vm exit */ + return 0; /* IRET will cause a vm exit */ /* * Something prevents NMI from been injected. Single step over possible @@ -3664,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_bp_intercept(vcpu); + return 0; } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e10217e99d2..e53a5f7aa8a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4417,22 +4417,20 @@ static int enable_irq_window(struct kvm_vcpu *vcpu) return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (!cpu_has_virtual_nmis()) { - enable_irq_window(vcpu); - return; - } + if (!cpu_has_virtual_nmis()) + return enable_irq_window(vcpu); + + if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) + return enable_irq_window(vcpu); - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } static void vmx_inject_irq(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96f914e828d..94f35d22c5f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5756,7 +5756,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_nmi_window(vcpu) != 0; else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) req_immediate_exit = kvm_x86_ops->enable_irq_window(vcpu) != 0; -- cgit v1.2.3-70-g09d2