diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 867 |
1 files changed, 560 insertions, 307 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b81a9b7c2ca..63fec1531e8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -31,6 +31,7 @@ #include <asm/tlbflush.h> #include <asm/desc.h> +#include <asm/kvm_para.h> #include <asm/virtext.h> #include "trace.h" @@ -50,6 +51,10 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_SVML (1 << 2) #define SVM_FEATURE_NRIP (1 << 3) +#define SVM_FEATURE_TSC_RATE (1 << 4) +#define SVM_FEATURE_VMCB_CLEAN (1 << 5) +#define SVM_FEATURE_FLUSH_ASID (1 << 6) +#define SVM_FEATURE_DECODE_ASSIST (1 << 7) #define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ @@ -97,10 +102,8 @@ struct nested_state { unsigned long vmexit_rax; /* cache for intercepts of the guest */ - u16 intercept_cr_read; - u16 intercept_cr_write; - u16 intercept_dr_read; - u16 intercept_dr_write; + u32 intercept_cr; + u32 intercept_dr; u32 intercept_exceptions; u64 intercept; @@ -123,7 +126,12 @@ struct vcpu_svm { u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; - u64 host_gs_base; + struct { + u16 fs; + u16 gs; + u16 ldt; + u64 gs_base; + } host; u32 *msrpm; @@ -133,6 +141,7 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; + u32 apf_reason; }; #define MSR_INVALID 0xffffffffU @@ -180,14 +189,151 @@ static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +enum { + VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, + pause filter count */ + VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ + VMCB_ASID, /* ASID */ + VMCB_INTR, /* int_ctl, int_vector */ + VMCB_NPT, /* npt_en, nCR3, gPAT */ + VMCB_CR, /* CR0, CR3, CR4, EFER */ + VMCB_DR, /* DR6, DR7 */ + VMCB_DT, /* GDT, IDT */ + VMCB_SEG, /* CS, DS, SS, ES, CPL */ + VMCB_CR2, /* CR2 only */ + VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ + VMCB_DIRTY_MAX, +}; + +/* TPR and CR2 are always written before VMRUN */ +#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) + +static inline void mark_all_dirty(struct vmcb *vmcb) +{ + vmcb->control.clean = 0; +} + +static inline void mark_all_clean(struct vmcb *vmcb) +{ + vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) + & ~VMCB_ALWAYS_DIRTY_MASK; +} + +static inline void mark_dirty(struct vmcb *vmcb, int bit) +{ + vmcb->control.clean &= ~(1 << bit); +} + static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_svm, vcpu); } -static inline bool is_nested(struct vcpu_svm *svm) +static void recalc_intercepts(struct vcpu_svm *svm) +{ + struct vmcb_control_area *c, *h; + struct nested_state *g; + + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + if (!is_guest_mode(&svm->vcpu)) + return; + + c = &svm->vmcb->control; + h = &svm->nested.hsave->control; + g = &svm->nested; + + c->intercept_cr = h->intercept_cr | g->intercept_cr; + c->intercept_dr = h->intercept_dr | g->intercept_dr; + c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept = h->intercept | g->intercept; +} + +static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) +{ + if (is_guest_mode(&svm->vcpu)) + return svm->nested.hsave; + else + return svm->vmcb; +} + +static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_cr |= (1U << bit); + + recalc_intercepts(svm); +} + +static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_cr &= ~(1U << bit); + + recalc_intercepts(svm); +} + +static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) { - return svm->nested.vmcb; + struct vmcb *vmcb = get_host_vmcb(svm); + + return vmcb->control.intercept_cr & (1U << bit); +} + +static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_dr |= (1U << bit); + + recalc_intercepts(svm); +} + +static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_dr &= ~(1U << bit); + + recalc_intercepts(svm); +} + +static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_exceptions |= (1U << bit); + + recalc_intercepts(svm); +} + +static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept_exceptions &= ~(1U << bit); + + recalc_intercepts(svm); +} + +static inline void set_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept |= (1ULL << bit); + + recalc_intercepts(svm); +} + +static inline void clr_intercept(struct vcpu_svm *svm, int bit) +{ + struct vmcb *vmcb = get_host_vmcb(svm); + + vmcb->control.intercept &= ~(1ULL << bit); + + recalc_intercepts(svm); } static inline void enable_gif(struct vcpu_svm *svm) @@ -264,11 +410,6 @@ static u32 svm_msrpm_offset(u32 msr) #define MAX_INST_SIZE 15 -static inline u32 svm_has(u32 feat) -{ - return svm_features & feat; -} - static inline void clgi(void) { asm volatile (__ex(SVM_CLGI)); @@ -284,16 +425,6 @@ static inline void invlpga(unsigned long addr, u32 asid) asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); } -static inline void force_new_asid(struct kvm_vcpu *vcpu) -{ - to_svm(vcpu)->asid_generation--; -} - -static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) -{ - force_new_asid(vcpu); -} - static int get_npt_level(void) { #ifdef CONFIG_X86_64 @@ -310,6 +441,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) efer &= ~EFER_LME; to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); } static int is_external_interrupt(u32 info) @@ -347,7 +479,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->next_rip = svm->vmcb->control.next_rip; if (!svm->next_rip) { - if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -374,7 +506,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, nested_svm_check_exception(svm, nr, has_error_code, error_code)) return; - if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { + if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); /* @@ -670,7 +802,7 @@ static __init int svm_hardware_setup(void) svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!svm_has(SVM_FEATURE_NPT)) + if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; if (npt_enabled && !npt) { @@ -725,13 +857,15 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) struct vcpu_svm *svm = to_svm(vcpu); u64 g_tsc_offset = 0; - if (is_nested(svm)) { + if (is_guest_mode(vcpu)) { g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; } svm->vmcb->control.tsc_offset = offset + g_tsc_offset; + + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) @@ -739,8 +873,9 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.tsc_offset += adjustment; - if (is_nested(svm)) + if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } static void init_vmcb(struct vcpu_svm *svm) @@ -749,62 +884,62 @@ static void init_vmcb(struct vcpu_svm *svm) struct vmcb_save_area *save = &svm->vmcb->save; svm->vcpu.fpu_active = 1; + svm->vcpu.arch.hflags = 0; - control->intercept_cr_read = INTERCEPT_CR0_MASK | - INTERCEPT_CR3_MASK | - INTERCEPT_CR4_MASK; - - control->intercept_cr_write = INTERCEPT_CR0_MASK | - INTERCEPT_CR3_MASK | - INTERCEPT_CR4_MASK | - INTERCEPT_CR8_MASK; - - control->intercept_dr_read = INTERCEPT_DR0_MASK | - INTERCEPT_DR1_MASK | - INTERCEPT_DR2_MASK | - INTERCEPT_DR3_MASK | - INTERCEPT_DR4_MASK | - INTERCEPT_DR5_MASK | - INTERCEPT_DR6_MASK | - INTERCEPT_DR7_MASK; - - control->intercept_dr_write = INTERCEPT_DR0_MASK | - INTERCEPT_DR1_MASK | - INTERCEPT_DR2_MASK | - INTERCEPT_DR3_MASK | - INTERCEPT_DR4_MASK | - INTERCEPT_DR5_MASK | - INTERCEPT_DR6_MASK | - INTERCEPT_DR7_MASK; - - control->intercept_exceptions = (1 << PF_VECTOR) | - (1 << UD_VECTOR) | - (1 << MC_VECTOR); - - - control->intercept = (1ULL << INTERCEPT_INTR) | - (1ULL << INTERCEPT_NMI) | - (1ULL << INTERCEPT_SMI) | - (1ULL << INTERCEPT_SELECTIVE_CR0) | - (1ULL << INTERCEPT_CPUID) | - (1ULL << INTERCEPT_INVD) | - (1ULL << INTERCEPT_HLT) | - (1ULL << INTERCEPT_INVLPG) | - (1ULL << INTERCEPT_INVLPGA) | - (1ULL << INTERCEPT_IOIO_PROT) | - (1ULL << INTERCEPT_MSR_PROT) | - (1ULL << INTERCEPT_TASK_SWITCH) | - (1ULL << INTERCEPT_SHUTDOWN) | - (1ULL << INTERCEPT_VMRUN) | - (1ULL << INTERCEPT_VMMCALL) | - (1ULL << INTERCEPT_VMLOAD) | - (1ULL << INTERCEPT_VMSAVE) | - (1ULL << INTERCEPT_STGI) | - (1ULL << INTERCEPT_CLGI) | - (1ULL << INTERCEPT_SKINIT) | - (1ULL << INTERCEPT_WBINVD) | - (1ULL << INTERCEPT_MONITOR) | - (1ULL << INTERCEPT_MWAIT); + set_cr_intercept(svm, INTERCEPT_CR0_READ); + set_cr_intercept(svm, INTERCEPT_CR3_READ); + set_cr_intercept(svm, INTERCEPT_CR4_READ); + set_cr_intercept(svm, INTERCEPT_CR0_WRITE); + set_cr_intercept(svm, INTERCEPT_CR3_WRITE); + set_cr_intercept(svm, INTERCEPT_CR4_WRITE); + set_cr_intercept(svm, INTERCEPT_CR8_WRITE); + + set_dr_intercept(svm, INTERCEPT_DR0_READ); + set_dr_intercept(svm, INTERCEPT_DR1_READ); + set_dr_intercept(svm, INTERCEPT_DR2_READ); + set_dr_intercept(svm, INTERCEPT_DR3_READ); + set_dr_intercept(svm, INTERCEPT_DR4_READ); + set_dr_intercept(svm, INTERCEPT_DR5_READ); + set_dr_intercept(svm, INTERCEPT_DR6_READ); + set_dr_intercept(svm, INTERCEPT_DR7_READ); + + set_dr_intercept(svm, INTERCEPT_DR0_WRITE); + set_dr_intercept(svm, INTERCEPT_DR1_WRITE); + set_dr_intercept(svm, INTERCEPT_DR2_WRITE); + set_dr_intercept(svm, INTERCEPT_DR3_WRITE); + set_dr_intercept(svm, INTERCEPT_DR4_WRITE); + set_dr_intercept(svm, INTERCEPT_DR5_WRITE); + set_dr_intercept(svm, INTERCEPT_DR6_WRITE); + set_dr_intercept(svm, INTERCEPT_DR7_WRITE); + + set_exception_intercept(svm, PF_VECTOR); + set_exception_intercept(svm, UD_VECTOR); + set_exception_intercept(svm, MC_VECTOR); + + set_intercept(svm, INTERCEPT_INTR); + set_intercept(svm, INTERCEPT_NMI); + set_intercept(svm, INTERCEPT_SMI); + set_intercept(svm, INTERCEPT_SELECTIVE_CR0); + set_intercept(svm, INTERCEPT_CPUID); + set_intercept(svm, INTERCEPT_INVD); + set_intercept(svm, INTERCEPT_HLT); + set_intercept(svm, INTERCEPT_INVLPG); + set_intercept(svm, INTERCEPT_INVLPGA); + set_intercept(svm, INTERCEPT_IOIO_PROT); + set_intercept(svm, INTERCEPT_MSR_PROT); + set_intercept(svm, INTERCEPT_TASK_SWITCH); + set_intercept(svm, INTERCEPT_SHUTDOWN); + set_intercept(svm, INTERCEPT_VMRUN); + set_intercept(svm, INTERCEPT_VMMCALL); + set_intercept(svm, INTERCEPT_VMLOAD); + set_intercept(svm, INTERCEPT_VMSAVE); + set_intercept(svm, INTERCEPT_STGI); + set_intercept(svm, INTERCEPT_CLGI); + set_intercept(svm, INTERCEPT_SKINIT); + set_intercept(svm, INTERCEPT_WBINVD); + set_intercept(svm, INTERCEPT_MONITOR); + set_intercept(svm, INTERCEPT_MWAIT); + set_intercept(svm, INTERCEPT_XSETBV); control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __pa(svm->msrpm); @@ -855,25 +990,27 @@ static void init_vmcb(struct vcpu_svm *svm) if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl = 1; - control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | - (1ULL << INTERCEPT_INVLPG)); - control->intercept_exceptions &= ~(1 << PF_VECTOR); - control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; - control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; + clr_intercept(svm, INTERCEPT_TASK_SWITCH); + clr_intercept(svm, INTERCEPT_INVLPG); + clr_exception_intercept(svm, PF_VECTOR); + clr_cr_intercept(svm, INTERCEPT_CR3_READ); + clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = 0x0007040600070406ULL; save->cr3 = 0; save->cr4 = 0; } - force_new_asid(&svm->vcpu); + svm->asid_generation = 0; svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; - if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { + if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { control->pause_filter_count = 3000; - control->intercept |= (1ULL << INTERCEPT_PAUSE); + set_intercept(svm, INTERCEPT_PAUSE); } + mark_all_dirty(svm->vmcb); + enable_gif(svm); } @@ -990,8 +1127,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(cpu != vcpu->cpu)) { svm->asid_generation = 0; + mark_all_dirty(svm->vmcb); } +#ifdef CONFIG_X86_64 + rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); +#endif + savesegment(fs, svm->host.fs); + savesegment(gs, svm->host.gs); + svm->host.ldt = kvm_read_ldt(); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } @@ -1002,6 +1147,14 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) int i; ++vcpu->stat.host_state_reload; + kvm_load_ldt(svm->host.ldt); +#ifdef CONFIG_X86_64 + loadsegment(fs, svm->host.fs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); + load_gs_index(svm->host.gs); +#else + loadsegment(gs, svm->host.gs); +#endif for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } @@ -1021,7 +1174,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) switch (reg) { case VCPU_EXREG_PDPTR: BUG_ON(!npt_enabled); - load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); + load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); break; default: BUG(); @@ -1030,12 +1183,12 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) static void svm_set_vintr(struct vcpu_svm *svm) { - svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; + set_intercept(svm, INTERCEPT_VINTR); } static void svm_clear_vintr(struct vcpu_svm *svm) { - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); + clr_intercept(svm, INTERCEPT_VINTR); } static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) @@ -1150,6 +1303,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) svm->vmcb->save.idtr.limit = dt->size; svm->vmcb->save.idtr.base = dt->address ; + mark_dirty(svm->vmcb, VMCB_DT); } static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) @@ -1166,19 +1320,23 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) svm->vmcb->save.gdtr.limit = dt->size; svm->vmcb->save.gdtr.base = dt->address ; + mark_dirty(svm->vmcb, VMCB_DT); } static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { } +static void svm_decache_cr3(struct kvm_vcpu *vcpu) +{ +} + static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { } static void update_cr0_intercept(struct vcpu_svm *svm) { - struct vmcb *vmcb = svm->vmcb; ulong gcr0 = svm->vcpu.arch.cr0; u64 *hcr0 = &svm->vmcb->save.cr0; @@ -1188,27 +1346,14 @@ static void update_cr0_intercept(struct vcpu_svm *svm) *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); + mark_dirty(svm->vmcb, VMCB_CR); if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { - vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; - vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; - if (is_nested(svm)) { - struct vmcb *hsave = svm->nested.hsave; - - hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; - hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; - vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; - vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; - } + clr_cr_intercept(svm, INTERCEPT_CR0_READ); + clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); } else { - svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; - svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; - if (is_nested(svm)) { - struct vmcb *hsave = svm->nested.hsave; - - hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; - hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; - } + set_cr_intercept(svm, INTERCEPT_CR0_READ); + set_cr_intercept(svm, INTERCEPT_CR0_WRITE); } } @@ -1216,7 +1361,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm)) { + if (is_guest_mode(vcpu)) { /* * We are here because we run in nested mode, the host kvm * intercepts cr0 writes but the l1 hypervisor does not. @@ -1268,6 +1413,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) */ cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; + mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); } @@ -1277,13 +1423,14 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - force_new_asid(vcpu); + svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) cr4 |= X86_CR4_PAE; cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; + mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -1312,26 +1459,25 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, = (svm->vmcb->save.cs.attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + mark_dirty(svm->vmcb, VMCB_SEG); } static void update_db_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.intercept_exceptions &= - ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); + clr_exception_intercept(svm, DB_VECTOR); + clr_exception_intercept(svm, BP_VECTOR); if (svm->nmi_singlestep) - svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); + set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { if (vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - svm->vmcb->control.intercept_exceptions |= - 1 << DB_VECTOR; + set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - svm->vmcb->control.intercept_exceptions |= - 1 << BP_VECTOR; + set_exception_intercept(svm, BP_VECTOR); } else vcpu->guest_debug = 0; } @@ -1345,21 +1491,9 @@ static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) else svm->vmcb->save.dr7 = vcpu->arch.dr7; - update_db_intercept(vcpu); -} - -static void load_host_msrs(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); -#endif -} + mark_dirty(svm->vmcb, VMCB_DR); -static void save_host_msrs(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_X86_64 - rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); -#endif + update_db_intercept(vcpu); } static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) @@ -1372,6 +1506,8 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid_generation = sd->asid_generation; svm->vmcb->control.asid = sd->next_asid++; + + mark_dirty(svm->vmcb, VMCB_ASID); } static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) @@ -1379,20 +1515,40 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.dr7 = value; + mark_dirty(svm->vmcb, VMCB_DR); } static int pf_interception(struct vcpu_svm *svm) { - u64 fault_address; + u64 fault_address = svm->vmcb->control.exit_info_2; u32 error_code; + int r = 1; - fault_address = svm->vmcb->control.exit_info_2; - error_code = svm->vmcb->control.exit_info_1; - - trace_kvm_page_fault(fault_address, error_code); - if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) - kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); - return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); + switch (svm->apf_reason) { + default: + error_code = svm->vmcb->control.exit_info_1; + + trace_kvm_page_fault(fault_address, error_code); + if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) + kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); + r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, + svm->vmcb->control.insn_bytes, + svm->vmcb->control.insn_len); + break; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + svm->apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wait(fault_address); + local_irq_enable(); + break; + case KVM_PV_REASON_PAGE_READY: + svm->apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wake(fault_address); + local_irq_enable(); + break; + } + return r; } static int db_interception(struct vcpu_svm *svm) @@ -1440,7 +1596,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; - er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -1449,21 +1605,8 @@ static int ud_interception(struct vcpu_svm *svm) static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u32 excp; - - if (is_nested(svm)) { - u32 h_excp, n_excp; - - h_excp = svm->nested.hsave->control.intercept_exceptions; - n_excp = svm->nested.intercept_exceptions; - h_excp &= ~(1 << NM_VECTOR); - excp = h_excp | n_excp; - } else { - excp = svm->vmcb->control.intercept_exceptions; - excp &= ~(1 << NM_VECTOR); - } - svm->vmcb->control.intercept_exceptions = excp; + clr_exception_intercept(svm, NM_VECTOR); svm->vcpu.fpu_active = 1; update_cr0_intercept(svm); @@ -1570,7 +1713,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string || in) - return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -1624,17 +1767,19 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.nested_cr3 = root; - force_new_asid(vcpu); + mark_dirty(svm->vmcb, VMCB_NPT); + svm_flush_tlb(vcpu); } -static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu) +static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, + struct x86_exception *fault) { struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.exit_code = SVM_EXIT_NPF; svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code; - svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address; + svm->vmcb->control.exit_info_1 = fault->error_code; + svm->vmcb->control.exit_info_2 = fault->address; nested_svm_vmexit(svm); } @@ -1680,7 +1825,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, { int vmexit; - if (!is_nested(svm)) + if (!is_guest_mode(&svm->vcpu)) return 0; svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; @@ -1698,7 +1843,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, /* This function returns true if it is save to enable the irq window */ static inline bool nested_svm_intr(struct vcpu_svm *svm) { - if (!is_nested(svm)) + if (!is_guest_mode(&svm->vcpu)) return true; if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) @@ -1737,7 +1882,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) /* This function returns true if it is save to enable the nmi window */ static inline bool nested_svm_nmi(struct vcpu_svm *svm) { - if (!is_nested(svm)) + if (!is_guest_mode(&svm->vcpu)) return true; if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) @@ -1836,8 +1981,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_HOST; break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs */ - if (!npt_enabled) + /* When we're shadowing, trap PFs, but not async PF */ + if (!npt_enabled && svm->apf_reason == 0) return NESTED_EXIT_HOST; break; case SVM_EXIT_EXCP_BASE + NM_VECTOR: @@ -1865,27 +2010,15 @@ static int nested_svm_intercept(struct vcpu_svm *svm) case SVM_EXIT_IOIO: vmexit = nested_svm_intercept_ioio(svm); break; - case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { - u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); - if (svm->nested.intercept_cr_read & cr_bits) - vmexit = NESTED_EXIT_DONE; - break; - } - case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { - u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); - if (svm->nested.intercept_cr_write & cr_bits) - vmexit = NESTED_EXIT_DONE; - break; - } - case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { - u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); - if (svm->nested.intercept_dr_read & dr_bits) + case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { + u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); + if (svm->nested.intercept_cr & bit) vmexit = NESTED_EXIT_DONE; break; } - case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { - u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); - if (svm->nested.intercept_dr_write & dr_bits) + case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { + u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); + if (svm->nested.intercept_dr & bit) vmexit = NESTED_EXIT_DONE; break; } @@ -1893,6 +2026,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm) u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); if (svm->nested.intercept_exceptions & excp_bits) vmexit = NESTED_EXIT_DONE; + /* async page fault always cause vmexit */ + else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && + svm->apf_reason != 0) + vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_ERR: { @@ -1926,10 +2063,8 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr struct vmcb_control_area *dst = &dst_vmcb->control; struct vmcb_control_area *from = &from_vmcb->control; - dst->intercept_cr_read = from->intercept_cr_read; - dst->intercept_cr_write = from->intercept_cr_write; - dst->intercept_dr_read = from->intercept_dr_read; - dst->intercept_dr_write = from->intercept_dr_write; + dst->intercept_cr = from->intercept_cr; + dst->intercept_dr = from->intercept_dr; dst->intercept_exceptions = from->intercept_exceptions; dst->intercept = from->intercept; dst->iopm_base_pa = from->iopm_base_pa; @@ -1970,7 +2105,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_vmcb) return 1; - /* Exit nested SVM mode */ + /* Exit Guest-Mode */ + leave_guest_mode(&svm->vcpu); svm->nested.vmcb = 0; /* Give the current vmcb to the guest */ @@ -1984,7 +2120,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.idtr = vmcb->save.idtr; nested_vmcb->save.efer = svm->vcpu.arch.efer; nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); - nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; + nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = vmcb->save.rflags; @@ -2061,6 +2197,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; + mark_all_dirty(svm->vmcb); + nested_svm_unmap(page); nested_svm_uninit_mmu_context(&svm->vcpu); @@ -2148,8 +2286,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) nested_vmcb->control.event_inj, nested_vmcb->control.nested_ctl); - trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, - nested_vmcb->control.intercept_cr_write, + trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, + nested_vmcb->control.intercept_cr >> 16, nested_vmcb->control.intercept_exceptions, nested_vmcb->control.intercept); @@ -2177,7 +2315,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (npt_enabled) hsave->save.cr3 = vmcb->save.cr3; else - hsave->save.cr3 = svm->vcpu.arch.cr3; + hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); copy_vmcb_control_area(hsave, vmcb); @@ -2229,14 +2367,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; /* cache intercepts */ - svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; - svm->nested.intercept_cr_write = nested_vmcb->control.intercept_cr_write; - svm->nested.intercept_dr_read = nested_vmcb->control.intercept_dr_read; - svm->nested.intercept_dr_write = nested_vmcb->control.intercept_dr_write; + svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; + svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; svm->nested.intercept = nested_vmcb->control.intercept; - force_new_asid(&svm->vcpu); + svm_flush_tlb(&svm->vcpu); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; @@ -2245,29 +2381,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { /* We only want the cr8 intercept bits of the guest */ - svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; - svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + clr_cr_intercept(svm, INTERCEPT_CR8_READ); + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); } /* We don't want to see VMMCALLs from a nested guest */ - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); - - /* - * We don't want a nested guest to be more powerful than the guest, so - * all intercepts are ORed - */ - svm->vmcb->control.intercept_cr_read |= - nested_vmcb->control.intercept_cr_read; - svm->vmcb->control.intercept_cr_write |= - nested_vmcb->control.intercept_cr_write; - svm->vmcb->control.intercept_dr_read |= - nested_vmcb->control.intercept_dr_read; - svm->vmcb->control.intercept_dr_write |= - nested_vmcb->control.intercept_dr_write; - svm->vmcb->control.intercept_exceptions |= - nested_vmcb->control.intercept_exceptions; - - svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + clr_intercept(svm, INTERCEPT_VMMCALL); svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; @@ -2278,11 +2397,21 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) nested_svm_unmap(page); - /* nested_vmcb is our indicator if nested SVM is activated */ + /* Enter Guest-Mode */ + enter_guest_mode(&svm->vcpu); + + /* + * Merge guest and host intercepts - must be called with vcpu in + * guest-mode to take affect here + */ + recalc_intercepts(svm); + svm->nested.vmcb = vmcb_gpa; enable_gif(svm); + mark_all_dirty(svm->vmcb); + return true; } @@ -2400,6 +2529,8 @@ static int clgi_interception(struct vcpu_svm *svm) svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + mark_dirty(svm->vmcb, VMCB_INTR); + return 1; } @@ -2426,6 +2557,19 @@ static int skinit_interception(struct vcpu_svm *svm) return 1; } +static int xsetbv_interception(struct vcpu_svm *svm) +{ + u64 new_bv = kvm_read_edx_eax(&svm->vcpu); + u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + + if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + } + + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); @@ -2507,19 +2651,92 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); + clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; return 1; } static int invlpg_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; + if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) + return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + + kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); + skip_emulated_instruction(&svm->vcpu); + return 1; } static int emulate_on_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; + return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; +} + +#define CR_VALID (1ULL << 63) + +static int cr_interception(struct vcpu_svm *svm) +{ + int reg, cr; + unsigned long val; + int err; + + if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) + return emulate_on_interception(svm); + + if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) + return emulate_on_interception(svm); + + reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; + cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; + + err = 0; + if (cr >= 16) { /* mov to cr */ + cr -= 16; + val = kvm_register_read(&svm->vcpu, reg); + switch (cr) { + case 0: + err = kvm_set_cr0(&svm->vcpu, val); + break; + case 3: + err = kvm_set_cr3(&svm->vcpu, val); + break; + case 4: + err = kvm_set_cr4(&svm->vcpu, val); + break; + case 8: + err = kvm_set_cr8(&svm->vcpu, val); + break; + default: + WARN(1, "unhandled write to CR%d", cr); + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + } else { /* mov from cr */ + switch (cr) { + case 0: + val = kvm_read_cr0(&svm->vcpu); + break; + case 2: + val = svm->vcpu.arch.cr2; + break; + case 3: + val = kvm_read_cr3(&svm->vcpu); + break; + case 4: + val = kvm_read_cr4(&svm->vcpu); + break; + case 8: + val = kvm_get_cr8(&svm->vcpu); + break; + default: + WARN(1, "unhandled read from CR%d", cr); + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + kvm_register_write(&svm->vcpu, reg, val); + } + kvm_complete_insn_gp(&svm->vcpu, err); + + return 1; } static int cr0_write_interception(struct vcpu_svm *svm) @@ -2527,7 +2744,7 @@ static int cr0_write_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; int r; - r = emulate_instruction(&svm->vcpu, 0, 0, 0); + r = cr_interception(svm); if (svm->nested.vmexit_rip) { kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); @@ -2536,22 +2753,49 @@ static int cr0_write_interception(struct vcpu_svm *svm) svm->nested.vmexit_rip = 0; } - return r == EMULATE_DONE; + return r; +} + +static int dr_interception(struct vcpu_svm *svm) +{ + int reg, dr; + unsigned long val; + int err; + + if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) + return emulate_on_interception(svm); + + reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; + dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; + + if (dr >= 16) { /* mov to DRn */ + val = kvm_register_read(&svm->vcpu, reg); + kvm_set_dr(&svm->vcpu, dr - 16, val); + } else { + err = kvm_get_dr(&svm->vcpu, dr, &val); + if (!err) + kvm_register_write(&svm->vcpu, reg, val); + } + + skip_emulated_instruction(&svm->vcpu); + + return 1; } static int cr8_write_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + int r; u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - emulate_instruction(&svm->vcpu, 0, 0, 0); + r = cr_interception(svm); if (irqchip_in_kernel(svm->vcpu.kvm)) { - svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; - return 1; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + return r; } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) - return 1; + return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; } @@ -2562,14 +2806,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc_offset; + struct vmcb *vmcb = get_host_vmcb(svm); - if (is_nested(svm)) - tsc_offset = svm->nested.hsave->control.tsc_offset; - else - tsc_offset = svm->vmcb->control.tsc_offset; - - *data = tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + native_read_tsc(); break; } case MSR_STAR: @@ -2714,7 +2953,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) svm->vmcb->save.sysenter_esp = data; break; case MSR_IA32_DEBUGCTLMSR: - if (!svm_has(SVM_FEATURE_LBRV)) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) { pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", __func__, data); break; @@ -2723,6 +2962,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) return 1; svm->vmcb->save.dbgctl = data; + mark_dirty(svm->vmcb, VMCB_LBR); if (data & (1ULL<<0)) svm_enable_lbrv(svm); else @@ -2775,6 +3015,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + mark_dirty(svm->vmcb, VMCB_INTR); /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -2797,31 +3038,31 @@ static int pause_interception(struct vcpu_svm *svm) } static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { - [SVM_EXIT_READ_CR0] = emulate_on_interception, - [SVM_EXIT_READ_CR3] = emulate_on_interception, - [SVM_EXIT_READ_CR4] = emulate_on_interception, - [SVM_EXIT_READ_CR8] = emulate_on_interception, + [SVM_EXIT_READ_CR0] = cr_interception, + [SVM_EXIT_READ_CR3] = cr_interception, + [SVM_EXIT_READ_CR4] = cr_interception, + [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, [SVM_EXIT_WRITE_CR0] = cr0_write_interception, - [SVM_EXIT_WRITE_CR3] = emulate_on_interception, - [SVM_EXIT_WRITE_CR4] = emulate_on_interception, + [SVM_EXIT_WRITE_CR3] = cr_interception, + [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, - [SVM_EXIT_READ_DR0] = emulate_on_interception, - [SVM_EXIT_READ_DR1] = emulate_on_interception, - [SVM_EXIT_READ_DR2] = emulate_on_interception, - [SVM_EXIT_READ_DR3] = emulate_on_interception, - [SVM_EXIT_READ_DR4] = emulate_on_interception, - [SVM_EXIT_READ_DR5] = emulate_on_interception, - [SVM_EXIT_READ_DR6] = emulate_on_interception, - [SVM_EXIT_READ_DR7] = emulate_on_interception, - [SVM_EXIT_WRITE_DR0] = emulate_on_interception, - [SVM_EXIT_WRITE_DR1] = emulate_on_interception, - [SVM_EXIT_WRITE_DR2] = emulate_on_interception, - [SVM_EXIT_WRITE_DR3] = emulate_on_interception, - [SVM_EXIT_WRITE_DR4] = emulate_on_interception, - [SVM_EXIT_WRITE_DR5] = emulate_on_interception, - [SVM_EXIT_WRITE_DR6] = emulate_on_interception, - [SVM_EXIT_WRITE_DR7] = emulate_on_interception, + [SVM_EXIT_READ_DR0] = dr_interception, + [SVM_EXIT_READ_DR1] = dr_interception, + [SVM_EXIT_READ_DR2] = dr_interception, + [SVM_EXIT_READ_DR3] = dr_interception, + [SVM_EXIT_READ_DR4] = dr_interception, + [SVM_EXIT_READ_DR5] = dr_interception, + [SVM_EXIT_READ_DR6] = dr_interception, + [SVM_EXIT_READ_DR7] = dr_interception, + [SVM_EXIT_WRITE_DR0] = dr_interception, + [SVM_EXIT_WRITE_DR1] = dr_interception, + [SVM_EXIT_WRITE_DR2] = dr_interception, + [SVM_EXIT_WRITE_DR3] = dr_interception, + [SVM_EXIT_WRITE_DR4] = dr_interception, + [SVM_EXIT_WRITE_DR5] = dr_interception, + [SVM_EXIT_WRITE_DR6] = dr_interception, + [SVM_EXIT_WRITE_DR7] = dr_interception, [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, @@ -2854,6 +3095,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, + [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = pf_interception, }; @@ -2864,10 +3106,10 @@ void dump_vmcb(struct kvm_vcpu *vcpu) struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr_read); - pr_err("cr_write: %04x\n", control->intercept_cr_write); - pr_err("dr_read: %04x\n", control->intercept_dr_read); - pr_err("dr_write: %04x\n", control->intercept_dr_write); + pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); + pr_err("cr_write: %04x\n", control->intercept_cr >> 16); + pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); + pr_err("dr_write: %04x\n", control->intercept_dr >> 16); pr_err("exceptions: %08x\n", control->intercept_exceptions); pr_err("intercepts: %016llx\n", control->intercept); pr_err("pause filter count: %d\n", control->pause_filter_count); @@ -2950,15 +3192,23 @@ void dump_vmcb(struct kvm_vcpu *vcpu) } +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + *info1 = control->exit_info_1; + *info2 = control->exit_info_2; +} + static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu); + trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) + if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; @@ -2970,7 +3220,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) return 1; } - if (is_nested(svm)) { + if (is_guest_mode(vcpu)) { int vmexit; trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, @@ -3033,7 +3283,6 @@ static void pre_svm_run(struct vcpu_svm *svm) struct svm_cpu_data *sd = per_cpu(svm_data, cpu); - svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* FIXME: handle wraparound of asid_generation */ if (svm->asid_generation != sd->asid_generation) new_asid(svm, sd); @@ -3045,7 +3294,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); + set_intercept(svm, INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -3058,6 +3307,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); + mark_dirty(svm->vmcb, VMCB_INTR); } static void svm_set_irq(struct kvm_vcpu *vcpu) @@ -3077,14 +3327,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; if (irr == -1) return; if (tpr >= irr) - svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; + set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } static int svm_nmi_allowed(struct kvm_vcpu *vcpu) @@ -3112,10 +3362,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); + set_intercept(svm, INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); + clr_intercept(svm, INTERCEPT_IRET); } } @@ -3131,7 +3381,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); - if (is_nested(svm)) + if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); return ret; @@ -3177,7 +3427,12 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) static void svm_flush_tlb(struct kvm_vcpu *vcpu) { - force_new_asid(vcpu); + struct vcpu_svm *svm = to_svm(vcpu); + + if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) + svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; + else + svm->asid_generation--; } static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) @@ -3188,10 +3443,10 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; - if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { + if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; kvm_set_cr8(vcpu, cr8); } @@ -3202,7 +3457,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; cr8 = kvm_get_cr8(vcpu); @@ -3289,9 +3544,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u16 fs_selector; - u16 gs_selector; - u16 ldt_selector; svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; @@ -3308,10 +3560,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) sync_lapic_to_cr8(vcpu); - save_host_msrs(vcpu); - savesegment(fs, fs_selector); - savesegment(gs, gs_selector); - ldt_selector = kvm_read_ldt(); svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); @@ -3389,19 +3637,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); - vcpu->arch.cr2 = svm->vmcb->save.cr2; - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - - load_host_msrs(vcpu); - kvm_load_ldt(ldt_selector); - loadsegment(fs, fs_selector); #ifdef CONFIG_X86_64 - load_gs_index(gs_selector); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); + wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else - loadsegment(gs, gs_selector); + loadsegment(fs, svm->host.fs); #endif reload_tss(vcpu); @@ -3410,10 +3649,21 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) stgi(); + vcpu->arch.cr2 = svm->vmcb->save.cr2; + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + sync_cr8_to_lapic(vcpu); svm->next_rip = 0; + svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; + + /* if exit due to PF check for async PF */ + if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) + svm->apf_reason = kvm_read_and_reset_pf_reason(); + if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); @@ -3426,6 +3676,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + MC_VECTOR)) svm_handle_mce(svm); + + mark_all_clean(svm->vmcb); } #undef R @@ -3435,7 +3687,8 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.cr3 = root; - force_new_asid(vcpu); + mark_dirty(svm->vmcb, VMCB_CR); + svm_flush_tlb(vcpu); } static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -3443,11 +3696,13 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.nested_cr3 = root; + mark_dirty(svm->vmcb, VMCB_NPT); /* Also sync guest cr3 here in case we live migrate */ - svm->vmcb->save.cr3 = vcpu->arch.cr3; + svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); + mark_dirty(svm->vmcb, VMCB_CR); - force_new_asid(vcpu); + svm_flush_tlb(vcpu); } static int is_disabled(void) @@ -3494,10 +3749,6 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { - case 0x00000001: - /* Mask out xsave bit as long as it is not supported by SVM */ - entry->ecx &= ~(bit(X86_FEATURE_XSAVE)); - break; case 0x80000001: if (nested) entry->ecx |= (1 << 2); /* Set SVM bit */ @@ -3511,7 +3762,7 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) additional features */ /* Support next_rip if host supports it */ - if (svm_has(SVM_FEATURE_NRIP)) + if (boot_cpu_has(X86_FEATURE_NRIPS)) entry->edx |= SVM_FEATURE_NRIP; /* Support NPT for the guest if enabled */ @@ -3571,6 +3822,7 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { { SVM_EXIT_WBINVD, "wbinvd" }, { SVM_EXIT_MONITOR, "monitor" }, { SVM_EXIT_MWAIT, "mwait" }, + { SVM_EXIT_XSETBV, "xsetbv" }, { SVM_EXIT_NPF, "npf" }, { -1, NULL } }; @@ -3594,9 +3846,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; - if (is_nested(svm)) - svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; + set_exception_intercept(svm, NM_VECTOR); update_cr0_intercept(svm); } @@ -3627,6 +3877,7 @@ static struct kvm_x86_ops svm_x86_ops = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, + .decache_cr3 = svm_decache_cr3, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr3 = svm_set_cr3, @@ -3667,7 +3918,9 @@ static struct kvm_x86_ops svm_x86_ops = { .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, + .get_exit_info = svm_get_exit_info, .exit_reasons_str = svm_exit_reasons_str, + .get_lpage_level = svm_get_lpage_level, .cpuid_update = svm_cpuid_update, |