From fdef3ad1b38660d74a29abc990940b5dbaaf3fc9 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Mon, 30 Apr 2007 09:45:24 +0300 Subject: KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs This patch enables IO bitmaps control on vmx and unmask the 0x80 port to avoid VMEXITs caused by accessing port 0x80. 0x80 is used as delays (see include/asm/io.h), and handling VMEXITs on its access is unnecessary but slows things down. This patch improves kernel build test at around 3%~5%. Because every VM uses the same io bitmap, it is shared between all VMs rather than a per-VM data structure. Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c1ac106ace8..52bd5f079df 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -34,6 +34,9 @@ MODULE_LICENSE("GPL"); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +static struct page *vmx_io_bitmap_a; +static struct page *vmx_io_bitmap_b; + #ifdef CONFIG_X86_64 #define HOST_IS_64 1 #else @@ -1129,8 +1132,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); /* I/O */ - vmcs_write64(IO_BITMAP_A, 0); - vmcs_write64(IO_BITMAP_B, 0); + vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); + vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); guest_write_tsc(0); @@ -1150,7 +1153,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) CPU_BASED_HLT_EXITING /* 20.6.2 */ | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ - | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ + | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); @@ -2188,11 +2191,50 @@ static struct kvm_arch_ops vmx_arch_ops = { static int __init vmx_init(void) { - return kvm_init_arch(&vmx_arch_ops, THIS_MODULE); + void *iova; + int r; + + vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_io_bitmap_a) + return -ENOMEM; + + vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_io_bitmap_b) { + r = -ENOMEM; + goto out; + } + + /* + * Allow direct access to the PC debug port (it is often used for I/O + * delays, but the vmexits simply slow things down). + */ + iova = kmap(vmx_io_bitmap_a); + memset(iova, 0xff, PAGE_SIZE); + clear_bit(0x80, iova); + kunmap(iova); + + iova = kmap(vmx_io_bitmap_b); + memset(iova, 0xff, PAGE_SIZE); + kunmap(iova); + + r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); + if (r) + goto out1; + + return 0; + +out1: + __free_page(vmx_io_bitmap_b); +out: + __free_page(vmx_io_bitmap_a); + return r; } static void __exit vmx_exit(void) { + __free_page(vmx_io_bitmap_b); + __free_page(vmx_io_bitmap_a); + kvm_exit_arch(); } -- cgit v1.2.3-70-g09d2 From e6adf28365b2fca0b5235cabff00c9f3d1e7bdf4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 16:07:54 +0300 Subject: KVM: Avoid saving and restoring some host CPU state on lightweight vmexit Many msrs and the like will only be used by the host if we schedule() or return to userspace. Therefore, we avoid saving them if we handle the exit within the kernel, and if a reschedule is not requested. Based on a patch from Eddie Dong with a couple of fixes by me. Signed-off-by: Yaozu(Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 1 + drivers/kvm/vmx.c | 105 ++++++++++++++++++++++++++++--------------------- 3 files changed, 62 insertions(+), 45 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 152312c1faf..7facebd1911 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -252,6 +252,7 @@ struct kvm_stat { u32 halt_exits; u32 request_irq_exits; u32 irq_exits; + u32 light_exits; }; struct kvm_vcpu { diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 8f1f07adb04..7d682586423 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "halt_exits", STAT_OFFSET(halt_exits) }, { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, + { "light_exits", STAT_OFFSET(light_exits) }, { NULL } }; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 52bd5f079df..84ce0c0930a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -483,6 +483,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_GS_BASE: vmcs_writel(GUEST_GS_BASE, data); break; + case MSR_LSTAR: + case MSR_SYSCALL_MASK: + msr = find_msr_entry(vcpu, msr_index); + if (msr) + msr->data = data; + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + break; #endif case MSR_IA32_SYSENTER_CS: vmcs_write32(GUEST_SYSENTER_CS, data); @@ -1820,7 +1827,7 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int fs_gs_ldt_reload_needed; int r; -again: +preempted: /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. @@ -1851,13 +1858,6 @@ again: if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); - kvm_load_guest_fpu(vcpu); - - /* - * Loading guest fpu may have cleared host cr0.ts - */ - vmcs_writel(HOST_CR0, read_cr0()); - #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); @@ -1865,6 +1865,14 @@ again: } #endif +again: + kvm_load_guest_fpu(vcpu); + + /* + * Loading guest fpu may have cleared host cr0.ts + */ + vmcs_writel(HOST_CR0, read_cr0()); + asm ( /* Store host registers */ "pushf \n\t" @@ -1984,36 +1992,8 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); - /* - * Reload segment selectors ASAP. (it's needed for a functional - * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 - * relies on having 0 in %gs for the CPU PDA to work.) - */ - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } ++vcpu->stat.exits; -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif - vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); @@ -2035,24 +2015,59 @@ again: if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { - ++vcpu->stat.signal_exits; - post_kvm_run_save(vcpu, kvm_run); + r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + ++vcpu->stat.signal_exits; + goto out; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++vcpu->stat.request_irq_exits; - post_kvm_run_save(vcpu, kvm_run); + r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; } - - kvm_resched(vcpu); - goto again; } } +out: + /* + * Reload segment selectors ASAP. (it's needed for a functional + * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 + * relies on having 0 in %gs for the CPU PDA to work.) + */ + if (fs_gs_ldt_reload_needed) { + load_ldt(ldt_sel); + load_fs(fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + load_msrs(vcpu->host_msrs, NR_BAD_MSRS); + } +#endif + + if (r > 0) { + kvm_resched(vcpu); + goto preempted; + } + post_kvm_run_save(vcpu, kvm_run); return r; } -- cgit v1.2.3-70-g09d2 From 05e0c8c344dd356b42e81bdf0d47d2b884bf49b5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 16:15:58 +0300 Subject: KVM: Unindent some code Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 58 +++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 84ce0c0930a..9ebb18d07bd 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1998,39 +1998,39 @@ again: asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); - if (fail) { + if (unlikely(fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; - } else { - /* - * Profile KVM exit RIPs: - */ - if (unlikely(prof_on == KVM_PROFILING)) - profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - - vcpu->launched = 1; - r = kvm_handle_exit(kvm_run, vcpu); - if (r > 0) { - /* Give scheduler a change to reschedule. */ - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - goto out; - } - - if (dm_request_for_irq_injection(vcpu, kvm_run)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.request_irq_exits; - goto out; - } - if (!need_resched()) { - ++vcpu->stat.light_exits; - goto again; - } + goto out; + } + /* + * Profile KVM exit RIPs: + */ + if (unlikely(prof_on == KVM_PROFILING)) + profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); + + vcpu->launched = 1; + r = kvm_handle_exit(kvm_run, vcpu); + if (r > 0) { + /* Give scheduler a change to reschedule. */ + if (signal_pending(current)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + goto out; + } + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; } } -- cgit v1.2.3-70-g09d2 From 621358455ae043ab39bc3481f13b101bd6016c8d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 11:32:28 +0300 Subject: KVM: Be more careful restoring fs on lightweight vmexit i386 wants fs for accessing the pda even on a lightweight exit, so ensure we can always restore it. This fixes a regression on i386 introduced by the lightweight vmexit patch. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9ebb18d07bd..49cadd31120 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1832,16 +1832,21 @@ preempted: * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - fs_sel = read_fs(); - gs_sel = read_gs(); ldt_sel = read_ldt(); - fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel; - if (!fs_gs_ldt_reload_needed) { + fs_gs_ldt_reload_needed = ldt_sel; + fs_sel = read_fs(); + if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - } else { + else { vmcs_write16(HOST_FS_SELECTOR, 0); + fs_gs_ldt_reload_needed = 1; + } + gs_sel = read_gs(); + if (!(gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, gs_sel); + else { vmcs_write16(HOST_GS_SELECTOR, 0); + fs_gs_ldt_reload_needed = 1; } #ifdef CONFIG_X86_64 @@ -2035,11 +2040,6 @@ again: } out: - /* - * Reload segment selectors ASAP. (it's needed for a functional - * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 - * relies on having 0 in %gs for the CPU PDA to work.) - */ if (fs_gs_ldt_reload_needed) { load_ldt(ldt_sel); load_fs(fs_sel); -- cgit v1.2.3-70-g09d2 From 33ed6329210f3ad0638306bfa46cd3aaf5a5f929 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 16:54:03 +0300 Subject: KVM: Fix potential guest state leak into host The lightweight vmexit path avoids saving and reloading certain host state. However in certain cases lightweight vmexit handling can schedule() which requires reloading the host state. So we store the host state in the vcpu structure, and reloaded it if we relinquish the vcpu. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 5 ++ drivers/kvm/vmx.c | 160 ++++++++++++++++++++++++++++++------------------------ 2 files changed, 94 insertions(+), 71 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index f6ee1892872..bb32383ddff 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -306,6 +306,11 @@ struct kvm_vcpu { char *guest_fx_image; int fpu_active; int guest_fpu_loaded; + struct vmx_host_state { + int loaded; + u16 fs_sel, gs_sel, ldt_sel; + int fs_gs_ldt_reload_needed; + } vmx_host_state; int mmio_needed; int mmio_read_completed; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 49cadd31120..677b38c4444 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -237,6 +237,93 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void reload_tss(void) +{ +#ifndef CONFIG_X86_64 + + /* + * VT restores TR but not its size. Useless. + */ + struct descriptor_table gdt; + struct segment_descriptor *descs; + + get_gdt(&gdt); + descs = (void *)gdt.base; + descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ + load_TR_desc(); +#endif +} + +static void vmx_save_host_state(struct kvm_vcpu *vcpu) +{ + struct vmx_host_state *hs = &vcpu->vmx_host_state; + + if (hs->loaded) + return; + + hs->loaded = 1; + /* + * Set host fs and gs selectors. Unfortunately, 22.2.3 does not + * allow segment selectors with cpl > 0 or ti == 1. + */ + hs->ldt_sel = read_ldt(); + hs->fs_gs_ldt_reload_needed = hs->ldt_sel; + hs->fs_sel = read_fs(); + if (!(hs->fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); + else { + vmcs_write16(HOST_FS_SELECTOR, 0); + hs->fs_gs_ldt_reload_needed = 1; + } + hs->gs_sel = read_gs(); + if (!(hs->gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); + else { + vmcs_write16(HOST_GS_SELECTOR, 0); + hs->fs_gs_ldt_reload_needed = 1; + } + +#ifdef CONFIG_X86_64 + vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); + vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); +#else + vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); + vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); +#endif +} + +static void vmx_load_host_state(struct kvm_vcpu *vcpu) +{ + struct vmx_host_state *hs = &vcpu->vmx_host_state; + + if (!hs->loaded) + return; + + hs->loaded = 0; + if (hs->fs_gs_ldt_reload_needed) { + load_ldt(hs->ldt_sel); + load_fs(hs->fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(hs->gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + load_msrs(vcpu->host_msrs, NR_BAD_MSRS); + } +#endif +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -283,6 +370,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { + vmx_load_host_state(vcpu); kvm_put_guest_fpu(vcpu); put_cpu(); } @@ -397,23 +485,6 @@ static void guest_write_tsc(u64 guest_tsc) vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } -static void reload_tss(void) -{ -#ifndef CONFIG_X86_64 - - /* - * VT restores TR but not its size. Useless. - */ - struct descriptor_table gdt; - struct segment_descriptor *descs; - - get_gdt(&gdt); - descs = (void *)gdt.base; - descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ - load_TR_desc(); -#endif -} - /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -1823,40 +1894,9 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; - u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; int r; preempted: - /* - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not - * allow segment selectors with cpl > 0 or ti == 1. - */ - ldt_sel = read_ldt(); - fs_gs_ldt_reload_needed = ldt_sel; - fs_sel = read_fs(); - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else { - vmcs_write16(HOST_FS_SELECTOR, 0); - fs_gs_ldt_reload_needed = 1; - } - gs_sel = read_gs(); - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else { - vmcs_write16(HOST_GS_SELECTOR, 0); - fs_gs_ldt_reload_needed = 1; - } - -#ifdef CONFIG_X86_64 - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); -#else - vmcs_writel(HOST_FS_BASE, segment_base(fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); -#endif - if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); @@ -1871,6 +1911,7 @@ preempted: #endif again: + vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); /* @@ -2040,29 +2081,6 @@ again: } out: - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif - if (r > 0) { kvm_resched(vcpu); goto preempted; -- cgit v1.2.3-70-g09d2 From 707c08743060b6721b08df68f4fd546b106e7510 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 17:33:43 +0300 Subject: KVM: Move some more msr mangling into vmx_save_host_state() Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 677b38c4444..93c3abfc1e0 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -290,6 +290,13 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); #endif + +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + } +#endif } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -1903,13 +1910,6 @@ preempted: if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - } -#endif - again: vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); -- cgit v1.2.3-70-g09d2 From abd3f2d622a810b7f6687f7ddb405e90e4cfb7ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 17:57:40 +0300 Subject: KVM: Rationalize exception bitmap usage Everyone owns a piece of the exception bitmap, but they happily write to the entire thing like there's no tomorrow. Centralize handling in update_exception_bitmap() and have everyone call that. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 93c3abfc1e0..2190020e055 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -237,6 +237,20 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void update_exception_bitmap(struct kvm_vcpu *vcpu) +{ + u32 eb; + + eb = 1u << PF_VECTOR; + if (!vcpu->fpu_active) + eb |= 1u << NM_VECTOR; + if (vcpu->guest_debug.enabled) + eb |= 1u << 1; + if (vcpu->rmode.active) + eb = ~0; + vmcs_write32(EXCEPTION_BITMAP, eb); +} + static void reload_tss(void) { #ifndef CONFIG_X86_64 @@ -618,10 +632,8 @@ static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { unsigned long dr7 = 0x400; - u32 exception_bitmap; int old_singlestep; - exception_bitmap = vmcs_read32(EXCEPTION_BITMAP); old_singlestep = vcpu->guest_debug.singlestep; vcpu->guest_debug.enabled = dbg->enabled; @@ -637,13 +649,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) dr7 |= 0 << (i*4+16); /* execution breakpoint */ } - exception_bitmap |= (1u << 1); /* Trap debug exceptions */ - vcpu->guest_debug.singlestep = dbg->singlestep; - } else { - exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */ + } else vcpu->guest_debug.singlestep = 0; - } if (old_singlestep && !vcpu->guest_debug.singlestep) { unsigned long flags; @@ -653,7 +661,7 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) vmcs_writel(GUEST_RFLAGS, flags); } - vmcs_write32(EXCEPTION_BITMAP, exception_bitmap); + update_exception_bitmap(vcpu); vmcs_writel(GUEST_DR7, dr7); return 0; @@ -767,14 +775,6 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void update_exception_bitmap(struct kvm_vcpu *vcpu) -{ - if (vcpu->rmode.active) - vmcs_write32(EXCEPTION_BITMAP, ~0); - else - vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); -} - static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -942,7 +942,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!(cr0 & CR0_TS_MASK)) { vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); + update_exception_bitmap(vcpu); } vmcs_writel(CR0_READ_SHADOW, cr0); @@ -958,7 +958,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (!(vcpu->cr0 & CR0_TS_MASK)) { vcpu->fpu_active = 0; vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); - vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); } } @@ -1243,7 +1243,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); - vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ @@ -1329,6 +1328,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 vmx_set_efer(vcpu, 0); #endif + update_exception_bitmap(vcpu); return 0; @@ -1489,7 +1489,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (is_no_device(intr_info)) { vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); if (!(vcpu->cr0 & CR0_TS_MASK)) vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); return 1; @@ -1684,7 +1684,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case 2: /* clts */ vcpu_load_rsp_rip(vcpu); vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); vcpu->cr0 &= ~CR0_TS_MASK; vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); -- cgit v1.2.3-70-g09d2 From 5fd86fcfc0dbdd42296b1182945f7a0a05578211 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 20:40:00 +0300 Subject: KVM: Consolidate guest fpu activation and deactivation Easier to keep track of where the fpu is this way. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/vmx.c | 50 +++++++++++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 20 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index bb32383ddff..472408743d7 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -42,7 +42,7 @@ (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ | CR0_NW_MASK | CR0_CD_MASK) #define KVM_VM_CR0_ALWAYS_ON \ - (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK) + (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK) #define KVM_GUEST_CR4_MASK \ (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2190020e055..096cb6a1e89 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -396,6 +396,26 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) put_cpu(); } +static void vmx_fpu_activate(struct kvm_vcpu *vcpu) +{ + if (vcpu->fpu_active) + return; + vcpu->fpu_active = 1; + vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + if (vcpu->cr0 & CR0_TS_MASK) + vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + update_exception_bitmap(vcpu); +} + +static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->fpu_active) + return; + vcpu->fpu_active = 0; + vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + update_exception_bitmap(vcpu); +} + static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) { vcpu_clear(vcpu); @@ -925,6 +945,8 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + vmx_fpu_deactivate(vcpu); + if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) enter_pmode(vcpu); @@ -940,26 +962,20 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (!(cr0 & CR0_TS_MASK)) { - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - } - vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); vcpu->cr0 = cr0; + + if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) + vmx_fpu_activate(vcpu); } static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { vmcs_writel(GUEST_CR3, cr3); - - if (!(vcpu->cr0 & CR0_TS_MASK)) { - vcpu->fpu_active = 0; - vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); - update_exception_bitmap(vcpu); - } + if (vcpu->cr0 & CR0_PE_MASK) + vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -1328,6 +1344,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 vmx_set_efer(vcpu, 0); #endif + vmx_fpu_activate(vcpu); update_exception_bitmap(vcpu); return 0; @@ -1488,10 +1505,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (is_no_device(intr_info)) { - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - if (!(vcpu->cr0 & CR0_TS_MASK)) - vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + vmx_fpu_activate(vcpu); return 1; } @@ -1683,11 +1697,10 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) break; case 2: /* clts */ vcpu_load_rsp_rip(vcpu); - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + vmx_fpu_deactivate(vcpu); vcpu->cr0 &= ~CR0_TS_MASK; vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); + vmx_fpu_activate(vcpu); skip_emulated_instruction(vcpu); return 1; case 1: /*mov from cr*/ @@ -2158,7 +2171,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmcs_clear(vmcs); vcpu->vmcs = vmcs; vcpu->launched = 0; - vcpu->fpu_active = 1; return 0; -- cgit v1.2.3-70-g09d2 From eff708bc2bacd4f22cf844871341bef341bd096a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 May 2007 16:10:01 +0300 Subject: KVM: VMX: Only reload guest msrs if they are already loaded If we set an msr via an ioctl() instead of by handling a guest exit, we have the host state loaded, so reloading the msrs would clobber host state instead of guest state. This fixes a host oops (and loss of a cpu) on a guest reboot. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 096cb6a1e89..b353eaa0a44 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -600,7 +600,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vcpu, msr_index); if (msr) msr->data = data; - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + if (vcpu->vmx_host_state.loaded) + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); break; #endif case MSR_IA32_SYSENTER_CS: -- cgit v1.2.3-70-g09d2 From 653e3108b7d6097d25089d25ab4e99bc58b28962 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 May 2007 10:55:37 +0300 Subject: KVM: Avoid corrupting tr in real mode The real mode tr needs to be set to a specific tss so that I/O instructions can function. Divert the new tr values to the real mode save area from where they will be restored on transition to protected mode. This fixes some crashes on reboot when the bios accesses an I/O instruction. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b353eaa0a44..e39ebe0b695 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1042,23 +1042,11 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->unusable = (ar >> 16) & 1; } -static void vmx_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +static u32 vmx_segment_access_rights(struct kvm_segment *var) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; - vmcs_writel(sf->base, var->base); - vmcs_write32(sf->limit, var->limit); - vmcs_write16(sf->selector, var->selector); - if (vcpu->rmode.active && var->s) { - /* - * Hack real-mode segments into vm86 compatibility. - */ - if (var->base == 0xffff0000 && var->selector == 0xf000) - vmcs_writel(sf->base, 0xf0000); - ar = 0xf3; - } else if (var->unusable) + if (var->unusable) ar = 1 << 16; else { ar = var->type & 15; @@ -1072,6 +1060,35 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, } if (ar == 0) /* a 0 value means unusable */ ar = AR_UNUSABLE_MASK; + + return ar; +} + +static void vmx_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + u32 ar; + + if (vcpu->rmode.active && seg == VCPU_SREG_TR) { + vcpu->rmode.tr.selector = var->selector; + vcpu->rmode.tr.base = var->base; + vcpu->rmode.tr.limit = var->limit; + vcpu->rmode.tr.ar = vmx_segment_access_rights(var); + return; + } + vmcs_writel(sf->base, var->base); + vmcs_write32(sf->limit, var->limit); + vmcs_write16(sf->selector, var->selector); + if (vcpu->rmode.active && var->s) { + /* + * Hack real-mode segments into vm86 compatibility. + */ + if (var->base == 0xffff0000 && var->selector == 0xf000) + vmcs_writel(sf->base, 0xf0000); + ar = 0xf3; + } else + ar = vmx_segment_access_rights(var); vmcs_write32(sf->ar_bytes, ar); } -- cgit v1.2.3-70-g09d2 From cd0536d7cb4d5d5c5aa37ccd3edd71c4b0524add Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 8 May 2007 11:34:07 +0300 Subject: KVM: Fix vmx I/O bitmap initialization on highmem systems kunmap() expects a struct page, not a virtual address. Fixes an oops loading kvm-intel.ko on i386 with CONFIG_HIGHMEM. Thanks to Michael Ivanov for reporting. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index e39ebe0b695..34171d9008f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2274,11 +2274,11 @@ static int __init vmx_init(void) iova = kmap(vmx_io_bitmap_a); memset(iova, 0xff, PAGE_SIZE); clear_bit(0x80, iova); - kunmap(iova); + kunmap(vmx_io_bitmap_a); iova = kmap(vmx_io_bitmap_b); memset(iova, 0xff, PAGE_SIZE); - kunmap(iova); + kunmap(vmx_io_bitmap_b); r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); if (r) -- cgit v1.2.3-70-g09d2 From cd2276a795b013d1416c96b38eec90a66cdd10c4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 14 May 2007 20:41:13 +0300 Subject: KVM: VMX: Use local labels in inline assembly This makes oprofile dumps and disassebly easier to read. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 34171d9008f..c4c553588a2 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1188,7 +1188,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) struct descriptor_table dt; int i; int ret = 0; - extern asmlinkage void kvm_vmx_return(void); + unsigned long kvm_vmx_return; if (!init_rmode_tss(vcpu->kvm)) { ret = -ENOMEM; @@ -1306,8 +1306,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) get_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ - - vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */ + asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); + vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); @@ -1997,12 +1997,11 @@ again: "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */ #endif /* Enter guest mode */ - "jne launched \n\t" + "jne .Llaunched \n\t" ASM_VMX_VMLAUNCH "\n\t" - "jmp kvm_vmx_return \n\t" - "launched: " ASM_VMX_VMRESUME "\n\t" - ".globl kvm_vmx_return \n\t" - "kvm_vmx_return: " + "jmp .Lkvm_vmx_return \n\t" + ".Llaunched: " ASM_VMX_VMRESUME "\n\t" + ".Lkvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ #ifdef CONFIG_X86_64 "xchg %3, (%%rsp) \n\t" -- cgit v1.2.3-70-g09d2 From b3f37707b05e9ce82d5bec660e9d0b15452ee9a0 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Thu, 17 May 2007 15:50:34 +0300 Subject: KVM: VMX: Handle #SS faults from real mode Instructions with address size override prefix opcode 0x67 Cause the #SS fault with 0 error code in VM86 mode. Forward them to the emulator. Signed-Off-By: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c4c553588a2..a05bfa08587 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1488,7 +1488,11 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (!vcpu->rmode.active) return 0; - if (vec == GP_VECTOR && err_code == 0) + /* + * Instruction with address size override prefix opcode 0x67 + * Cause the #SS fault with 0 error code in VM86 mode. + */ + if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE) return 1; return 0; -- cgit v1.2.3-70-g09d2 From a75beee6e4f5d2f0ae6e28cd626b2f157e93afd2 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Thu, 17 May 2007 18:55:15 +0300 Subject: KVM: VMX: Avoid saving and restoring msrs on lightweight vmexit In a lightweight exit (where we exit and reenter the guest without scheduling or exiting to userspace in between), we don't need various msrs on the host, and avoiding shuffling them around reduces raw exit time by 8%. i386 compile fix by Daniel Hecken . Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 4 ++ drivers/kvm/vmx.c | 128 ++++++++++++++++++++++++++++++------------------------ 2 files changed, 76 insertions(+), 56 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index fc4a6c1235f..c252efed49d 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -288,6 +288,10 @@ struct kvm_vcpu { u64 apic_base; u64 ia32_misc_enable_msr; int nmsrs; + int save_nmsrs; +#ifdef CONFIG_X86_64 + int msr_offset_kernel_gs_base; +#endif struct vmx_msr_entry *guest_msrs; struct vmx_msr_entry *host_msrs; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a05bfa08587..872ca0381fb 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -85,19 +85,6 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -#ifdef CONFIG_X86_64 -static unsigned msr_offset_kernel_gs_base; -#define NR_64BIT_MSRS 4 -/* - * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt - * mechanism (cpu bug AA24) - */ -#define NR_BAD_MSRS 2 -#else -#define NR_64BIT_MSRS 0 -#define NR_BAD_MSRS 0 -#endif - static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -118,13 +105,23 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) { int i; for (i = 0; i < vcpu->nmsrs; ++i) if (vcpu->guest_msrs[i].index == msr) - return &vcpu->guest_msrs[i]; + return i; + return -1; +} + +static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +{ + int i; + + i = __find_msr_index(vcpu, msr); + if (i >= 0) + return &vcpu->guest_msrs[i]; return NULL; } @@ -307,10 +304,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); } #endif + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -337,12 +334,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) reload_tss(); } -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif + save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); } /* @@ -463,6 +456,20 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) INTR_INFO_VALID_MASK); } +/* + * Swap MSR entry in host/guest MSR entry array. + */ +void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) +{ + struct vmx_msr_entry tmp; + tmp = vcpu->guest_msrs[to]; + vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; + vcpu->guest_msrs[from] = tmp; + tmp = vcpu->host_msrs[to]; + vcpu->host_msrs[to] = vcpu->host_msrs[from]; + vcpu->host_msrs[from] = tmp; +} + /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -470,35 +477,54 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) */ static void setup_msrs(struct kvm_vcpu *vcpu) { - int nr_skip, nr_good_msrs; + int index, save_nmsrs; - if (is_long_mode(vcpu)) - nr_skip = NR_BAD_MSRS; - else - nr_skip = NR_64BIT_MSRS; - nr_good_msrs = vcpu->nmsrs - nr_skip; + save_nmsrs = 0; +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_LSTAR); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_CSTAR); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + /* + * MSR_K6_STAR is only needed on long mode guests, and only + * if efer.sce is enabled. + */ + index = __find_msr_index(vcpu, MSR_K6_STAR); + if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) + move_msr_up(vcpu, index, save_nmsrs++); + } +#endif + vcpu->save_nmsrs = save_nmsrs; - /* - * MSR_K6_STAR is only needed on long mode guests, and only - * if efer.sce is enabled. - */ - if (find_msr_entry(vcpu, MSR_K6_STAR)) { - --nr_good_msrs; #ifdef CONFIG_X86_64 - if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) - ++nr_good_msrs; + vcpu->msr_offset_kernel_gs_base = + __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif + index = __find_msr_index(vcpu, MSR_EFER); + if (index >= 0) + save_nmsrs = 1; + else { + save_nmsrs = 0; + index = 0; } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + nr_skip)); + virt_to_phys(vcpu->guest_msrs + index)); vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + nr_skip)); + virt_to_phys(vcpu->guest_msrs + index)); vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + nr_skip)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ + virt_to_phys(vcpu->host_msrs + index)); + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); } /* @@ -595,14 +621,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_GS_BASE: vmcs_writel(GUEST_GS_BASE, data); break; - case MSR_LSTAR: - case MSR_SYSCALL_MASK: - msr = find_msr_entry(vcpu, msr_index); - if (msr) - msr->data = data; - if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - break; #endif case MSR_IA32_SYSENTER_CS: vmcs_write32(GUEST_SYSENTER_CS, data); @@ -620,6 +638,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vcpu, msr_index); if (msr) { msr->data = data; + if (vcpu->vmx_host_state.loaded) + load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1331,10 +1351,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->host_msrs[j].reserved = 0; vcpu->host_msrs[j].data = data; vcpu->guest_msrs[j] = vcpu->host_msrs[j]; -#ifdef CONFIG_X86_64 - if (index == MSR_KERNEL_GS_BASE) - msr_offset_kernel_gs_base = j; -#endif ++vcpu->nmsrs; } -- cgit v1.2.3-70-g09d2 From f2be4dd65437c60a4eb222bc40bc8caded62631a Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Sun, 20 May 2007 10:50:08 +0300 Subject: KVM: VMX: Cleanup redundant code in MSR set Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 872ca0381fb..dc99191dbb4 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -643,8 +643,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; } return kvm_set_msr_common(vcpu, msr_index, data); - msr->data = data; - break; } return 0; -- cgit v1.2.3-70-g09d2 From 2cc51560aed0edb291341089d3475e1fbe8bfd04 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 21 May 2007 07:28:09 +0300 Subject: KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. This patch reduces raw vmexit time by ~27%. Me: fix compile warnings on i386. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 ++ drivers/kvm/kvm_main.c | 23 +++++++++++++++++ drivers/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index c252efed49d..db2bc6f168c 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -255,6 +255,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -289,6 +290,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 095d673b9ef..af07cd539bb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -73,6 +73,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2378,6 +2379,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1 << 20); + printk(KERN_INFO ": guest NX capability removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2392,6 +2414,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index dc99191dbb4..93e5bb2c40e 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -42,6 +42,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -85,6 +86,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) +{ + return (u64)msr.data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -265,6 +278,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -308,6 +334,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -336,6 +364,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -477,11 +507,13 @@ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) */ static void setup_msrs(struct kvm_vcpu *vcpu) { - int index, save_nmsrs; + int save_nmsrs; save_nmsrs = 0; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { + int index; + index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); if (index >= 0) move_msr_up(vcpu, index, save_nmsrs++); @@ -509,22 +541,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -611,10 +628,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret = 0; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + break; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -639,13 +661,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); } - return 0; + return ret; } /* @@ -1326,6 +1348,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); -- cgit v1.2.3-70-g09d2 From 17c3ba9d37dbda490792a2b52953f09d0dee30d6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Jun 2007 15:58:30 +0300 Subject: KVM: Lazy guest cr3 switching Switch guest paging context may require us to allocate memory, which might fail. Instead of wiring up error paths everywhere, make context switching lazy and actually do the switch before the next guest entry, where we can return an error if allocation fails. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 10 ++++++++++ drivers/kvm/mmu.c | 43 ++++++++++++++++++++++--------------------- drivers/kvm/svm.c | 4 ++++ drivers/kvm/vmx.c | 4 ++++ 4 files changed, 40 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 199e1e9bae2..3ec4e26b9bd 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -544,6 +544,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *old, const u8 *new, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +int kvm_mmu_load(struct kvm_vcpu *vcpu); +void kvm_mmu_unload(struct kvm_vcpu *vcpu); int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -555,6 +557,14 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, return vcpu->mmu.page_fault(vcpu, gva, error_code); } +static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) +{ + if (likely(vcpu->mmu.root_hpa != INVALID_PAGE)) + return 0; + + return kvm_mmu_load(vcpu); +} + static inline int is_long_mode(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 283df031b03..5915d7a1c4f 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -949,9 +949,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) context->free = nonpaging_free; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa); + context->root_hpa = INVALID_PAGE; return 0; } @@ -965,11 +963,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) { pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); mmu_free_roots(vcpu); - if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) - kvm_mmu_free_some_pages(vcpu); - mmu_alloc_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); - kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } static void inject_page_fault(struct kvm_vcpu *vcpu, @@ -1003,10 +996,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) context->free = paging_free; context->root_level = level; context->shadow_root_level = level; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa | - (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); + context->root_hpa = INVALID_PAGE; return 0; } @@ -1025,10 +1015,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) context->free = paging_free; context->root_level = PT32_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa | - (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); + context->root_hpa = INVALID_PAGE; return 0; } @@ -1042,7 +1029,6 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); - mmu_topup_memory_caches(vcpu); if (!is_paging(vcpu)) return nonpaging_init_context(vcpu); else if (is_long_mode(vcpu)) @@ -1063,17 +1049,32 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) } int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) +{ + destroy_kvm_mmu(vcpu); + return init_kvm_mmu(vcpu); +} + +int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; - destroy_kvm_mmu(vcpu); - r = init_kvm_mmu(vcpu); - if (r < 0) - goto out; + spin_lock(&vcpu->kvm->lock); r = mmu_topup_memory_caches(vcpu); + if (r) + goto out; + mmu_alloc_roots(vcpu); + kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); + kvm_mmu_flush_tlb(vcpu); out: + spin_unlock(&vcpu->kvm->lock); return r; } +EXPORT_SYMBOL_GPL(kvm_mmu_load); + +void kvm_mmu_unload(struct kvm_vcpu *vcpu) +{ + mmu_free_roots(vcpu); +} static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 6cd6a50a034..ec040e2f8c5 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1483,6 +1483,10 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; again: + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + return r; + if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 93e5bb2c40e..4d255493a57 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1988,6 +1988,10 @@ again: vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + goto out; + /* * Loading guest fpu may have cleared host cr0.ts */ -- cgit v1.2.3-70-g09d2 From 50a3485c594d0d52196cde4d208b37cda779fbf3 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 3 Jun 2007 13:35:29 -0400 Subject: KVM: Replace C code with call to ARRAY_SIZE() macro. Signed-off-by: Robert P. J. Day Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 4d255493a57..a534e6fe818 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1932,7 +1932,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, }; static const int kvm_vmx_max_exit_handlers = - sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); + ARRAY_SIZE(kvm_vmx_exit_handlers); /* * The guest has exited. See if we can fix it or if we need userspace -- cgit v1.2.3-70-g09d2 From d3bef15f84f91c73a5515ad4c6a1749f8f63afcf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 15:53:05 +0300 Subject: KVM: Move duplicate halt handling code into kvm_main.c Will soon have a thid user. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 11 +++++++++++ drivers/kvm/svm.c | 7 +------ drivers/kvm/vmx.c | 7 +------ 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e665f550015..ac358b8d3de 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -504,6 +504,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned long count, int string, int down, gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 230b25aa469..55641696254 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1285,6 +1285,17 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + if (vcpu->irq_summary) + return 1; + + vcpu->run->exit_reason = KVM_EXIT_HLT; + ++vcpu->stat.halt_exits; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt); + int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) { unsigned long nr, a0, a1, a2, a3, a4, a5, ret; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index ec040e2f8c5..70f386e04cb 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1115,12 +1115,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; skip_emulated_instruction(vcpu); - if (vcpu->irq_summary) - return 1; - - kvm_run->exit_reason = KVM_EXIT_HLT; - ++vcpu->stat.halt_exits; - return 0; + return kvm_emulate_halt(vcpu); } static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a534e6fe818..90abd3c58c6 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1896,12 +1896,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); - if (vcpu->irq_summary) - return 1; - - kvm_run->exit_reason = KVM_EXIT_HLT; - ++vcpu->stat.halt_exits; - return 0; + return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -- cgit v1.2.3-70-g09d2 From 72d6e5a08a8ba2105b3f36e32285e8fbfbed1f71 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 16:15:51 +0300 Subject: KVM: Emulate hlt on real mode for Intel This has two use cases: the bios can't boot from disk, and guest smp bootstrap. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/vmx.c | 7 ++++++- drivers/kvm/x86_emulate.c | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index ac358b8d3de..d49b16cae27 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -347,6 +347,7 @@ struct kvm_vcpu { u32 ar; } tr, es, ds, fs, gs; } rmode; + int halt_request; /* real mode on Intel only */ int cpuid_nent; struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 90abd3c58c6..a1f51b9d482 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1608,8 +1608,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->rmode.active && handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, - error_code)) + error_code)) { + if (vcpu->halt_request) { + vcpu->halt_request = 0; + return kvm_emulate_halt(vcpu); + } return 1; + } if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 6123c0292b2..a4a84817b27 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -143,7 +143,8 @@ static u8 opcode_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, - 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, + ImplicitOps, 0, + ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, /* 0xF8 - 0xFF */ 0, 0, 0, 0, 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM @@ -1149,6 +1150,9 @@ special_insn: case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; + case 0xf4: /* hlt */ + ctxt->vcpu->halt_request = 1; + goto done; } goto writeback; -- cgit v1.2.3-70-g09d2 From d9e368d61263055eceac2966bb7ea31b89da3425 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Jun 2007 19:18:30 +0300 Subject: KVM: Flush remote tlbs when reducing shadow pte permissions When a vcpu causes a shadow tlb entry to have reduced permissions, it must also clear the tlb on remote vcpus. We do that by: - setting a bit on the vcpu that requests a tlb flush before the next entry - if the vcpu is currently executing, we send an ipi to make sure it exits before we continue Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 8 ++++++++ drivers/kvm/kvm_main.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/mmu.c | 8 +++++--- drivers/kvm/svm.c | 17 ++++++++++++----- drivers/kvm/vmx.c | 22 +++++++++++++++------- 5 files changed, 84 insertions(+), 15 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 528a56b1790..b08272bce21 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -83,6 +83,11 @@ #define KVM_PIO_PAGE_OFFSET 1 +/* + * vcpu->requests bit members + */ +#define KVM_TLB_FLUSH 0 + /* * Address types: * @@ -272,6 +277,8 @@ struct kvm_vcpu { u64 host_tsc; struct kvm_run *run; int interrupt_window_open; + int guest_mode; + unsigned long requests; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) unsigned long irq_pending[NR_IRQ_WORDS]; @@ -530,6 +537,7 @@ void save_msrs(struct vmx_msr_entry *e, int n); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); +void kvm_flush_remote_tlbs(struct kvm *kvm); int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 4e1a017f3db..633c2eded08 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include "x86_emulate.h" #include "segment_descriptor.h" @@ -309,6 +311,48 @@ static void vcpu_put(struct kvm_vcpu *vcpu) mutex_unlock(&vcpu->mutex); } +static void ack_flush(void *_completed) +{ + atomic_t *completed = _completed; + + atomic_inc(completed); +} + +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + int i, cpu, needed; + cpumask_t cpus; + struct kvm_vcpu *vcpu; + atomic_t completed; + + atomic_set(&completed, 0); + cpus_clear(cpus); + needed = 0; + for (i = 0; i < kvm->nvcpus; ++i) { + vcpu = &kvm->vcpus[i]; + if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) + continue; + cpu = vcpu->cpu; + if (cpu != -1 && cpu != raw_smp_processor_id()) + if (!cpu_isset(cpu, cpus)) { + cpu_set(cpu, cpus); + ++needed; + } + } + + /* + * We really want smp_call_function_mask() here. But that's not + * available, so ipi all cpus in parallel and wait for them + * to complete. + */ + for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus)) + smp_call_function_single(cpu, ack_flush, &completed, 1, 0); + while (atomic_read(&completed) != needed) { + cpu_relax(); + barrier(); + } +} + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index d4de988d182..ad50cfda5ac 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -441,7 +441,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(vcpu, spte); - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); } } @@ -656,7 +656,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, rmap_remove(vcpu, &pt[i]); pt[i] = 0; } - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); return; } @@ -669,6 +669,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, ent &= PT64_BASE_ADDR_MASK; mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); } + kvm_flush_remote_tlbs(vcpu->kvm); } static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, @@ -1093,6 +1094,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } } *spte = 0; + kvm_flush_remote_tlbs(vcpu->kvm); } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, @@ -1308,7 +1310,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) } mmu_free_memory_caches(vcpu); - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); init_kvm_mmu(vcpu); } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 70f386e04cb..eb175c5cd49 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1470,6 +1470,11 @@ static void load_db_regs(unsigned long *db_regs) asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); } +static void svm_flush_tlb(struct kvm_vcpu *vcpu) +{ + force_new_asid(vcpu); +} + static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u16 fs_selector; @@ -1487,6 +1492,11 @@ again: clgi(); + vcpu->guest_mode = 1; + if (vcpu->requests) + if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) + svm_flush_tlb(vcpu); + pre_svm_run(vcpu); save_host_msrs(vcpu); @@ -1618,6 +1628,8 @@ again: #endif : "cc", "memory" ); + vcpu->guest_mode = 0; + if (vcpu->fpu_active) { fx_save(vcpu->guest_fx_image); fx_restore(vcpu->host_fx_image); @@ -1682,11 +1694,6 @@ again: return r; } -static void svm_flush_tlb(struct kvm_vcpu *vcpu) -{ - force_new_asid(vcpu); -} - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { vcpu->svm->vmcb->save.cr3 = root; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a1f51b9d482..b969db1e083 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1972,6 +1972,11 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); } +static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +{ + vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; @@ -1997,9 +2002,15 @@ again: */ vmcs_writel(HOST_CR0, read_cr0()); + local_irq_disable(); + + vcpu->guest_mode = 1; + if (vcpu->requests) + if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) + vmx_flush_tlb(vcpu); + asm ( /* Store host registers */ - "pushf \n\t" #ifdef CONFIG_X86_64 "push %%rax; push %%rbx; push %%rdx;" "push %%rsi; push %%rdi; push %%rbp;" @@ -2091,7 +2102,6 @@ again: "pop %%ecx; popa \n\t" #endif "setbe %0 \n\t" - "popf \n\t" : "=q" (fail) : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), "c"(vcpu), @@ -2115,6 +2125,9 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); + vcpu->guest_mode = 0; + local_irq_enable(); + ++vcpu->stat.exits; vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; @@ -2167,11 +2180,6 @@ out: return r; } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) -{ - vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); -} - static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 err_code) -- cgit v1.2.3-70-g09d2 From a3870c47891629dae1765358fbaba3c49460f47a Mon Sep 17 00:00:00 2001 From: Shani Moideen Date: Mon, 11 Jun 2007 09:31:33 +0530 Subject: KVM: VMX: Replace memset(, 0, PAGESIZE) with clear_page() Signed-off-by: Shani Moideen Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b969db1e083..b909b545567 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1180,16 +1180,16 @@ static int init_rmode_tss(struct kvm* kvm) } page = kmap_atomic(p1, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; kunmap_atomic(page, KM_USER0); page = kmap_atomic(p2, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); kunmap_atomic(page, KM_USER0); page = kmap_atomic(p3, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0; kunmap_atomic(page, KM_USER0); -- cgit v1.2.3-70-g09d2 From 94cea1bb9d050c3200b36420cc03ba744dfd4338 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Jun 2007 19:43:19 +0300 Subject: KVM: Initialize the BSP bit in the APIC_BASE msr correctly Needs to be set on vcpu 0 only. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 6 +++--- drivers/kvm/vmx.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 68841ef671b..62ec38c7027 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -589,9 +589,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) fx_init(vcpu); vcpu->fpu_active = 1; - vcpu->apic_base = 0xfee00000 | - /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | - MSR_IA32_APICBASE_ENABLE; + vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vcpu == &vcpu->kvm->vcpus[0]) + vcpu->apic_base |= MSR_IA32_APICBASE_BSP; return 0; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b909b545567..0b2aace70ae 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1238,9 +1238,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) memset(vcpu->regs, 0, sizeof(vcpu->regs)); vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); vcpu->cr8 = 0; - vcpu->apic_base = 0xfee00000 | - /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | - MSR_IA32_APICBASE_ENABLE; + vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vcpu == &vcpu->kvm->vcpus[0]) + vcpu->apic_base |= MSR_IA32_APICBASE_BSP; fx_init(vcpu); -- cgit v1.2.3-70-g09d2 From 7700270ee3c1324c18f5b7c36ee5ba1a4165919a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Jun 2007 19:55:28 +0300 Subject: KVM: VMX: Ensure vcpu time stamp counter is monotonous If the time stamp counter goes backwards, a guest delay loop can become infinite. This can happen if a vcpu is migrated to another cpu, where the counter has a lower value than the first cpu. Since we're doing an IPI to the first cpu anyway, we can use that to pick up the old tsc, and use that to calculate the adjustment we need to make to the tsc offset. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 0b2aace70ae..d06c3627f64 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -160,6 +160,7 @@ static void __vcpu_clear(void *arg) vmcs_clear(vcpu->vmcs); if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) per_cpu(current_vmcs, cpu) = NULL; + rdtscll(vcpu->host_tsc); } static void vcpu_clear(struct kvm_vcpu *vcpu) @@ -376,6 +377,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { u64 phys_addr = __pa(vcpu->vmcs); int cpu; + u64 tsc_this, delta; cpu = get_cpu(); @@ -409,6 +411,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + /* + * Make sure the time stamp counter is monotonous. + */ + rdtscll(tsc_this); + delta = vcpu->host_tsc - tsc_this; + vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); } } -- cgit v1.2.3-70-g09d2 From ff1dc7942ba8fa4a86619bcb37ed68afae1f69ca Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Thu, 31 May 2007 14:08:58 -0400 Subject: KVM: VMX: Fix interrupt checking on lightweight exit With kernel-injected interrupts, we need to check for interrupts on lightweight exits too. Signed-off-by: Gregory Haskins Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d06c3627f64..b47ddccc7d7 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1992,13 +1992,13 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; preempted: - if (!vcpu->mmio_read_completed) - do_interrupt_requests(vcpu, kvm_run); - if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); again: + if (!vcpu->mmio_read_completed) + do_interrupt_requests(vcpu, kvm_run); + vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); -- cgit v1.2.3-70-g09d2 From 75880a01124c6aa5d428bdc14163039a87618be1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jun 2007 11:20:04 +0300 Subject: KVM: VMX: Reinitialize the real-mode tss when entering real mode Protected mode code may have corrupted the real-mode tss, so re-initialize it when switching to real mode. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b47ddccc7d7..42a916379ce 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -31,6 +31,8 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static int init_rmode_tss(struct kvm *kvm); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -951,6 +953,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); + + init_rmode_tss(vcpu->kvm); } #ifdef CONFIG_X86_64 -- cgit v1.2.3-70-g09d2 From 796fd1b23e463e98b3e2fc86ed571db06dc945bb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Jun 2007 11:54:45 +0300 Subject: KVM: VMX: Remove unnecessary code in vmx_tlb_flush() A vmexit implicitly flushes the tlb; the code is bogus. Noted by Shaohua Li. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 42a916379ce..7d04ffaaf94 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1987,7 +1987,6 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { - vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); } static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -- cgit v1.2.3-70-g09d2 From e495606dd09d79f9fa496334ac3958f6ff179d82 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 14:15:57 -0400 Subject: KVM: Clean up #includes Remove unnecessary ones, and rearange the remaining in the standard order. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 18 +++++++----------- drivers/kvm/mmu.c | 10 ++++++---- drivers/kvm/svm.c | 7 ++++--- drivers/kvm/vmx.c | 5 +++-- 4 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 26ca90f74fc..ea027190a65 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -16,37 +16,33 @@ */ #include "kvm.h" +#include "x86_emulate.h" +#include "segment_descriptor.h" #include #include #include -#include -#include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include #include -#include -#include -#include #include #include #include #include -#include "x86_emulate.h" -#include "segment_descriptor.h" +#include +#include +#include +#include +#include MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 49ffbd3da74..b297a6b111a 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -16,16 +16,18 @@ * the COPYING file in the top-level directory. * */ + +#include "vmx.h" +#include "kvm.h" + #include #include -#include #include #include #include -#include -#include "vmx.h" -#include "kvm.h" +#include +#include #undef MMU_DEBUG diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index a0d442883e1..bc818cc126e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -14,16 +14,17 @@ * */ +#include "kvm_svm.h" +#include "x86_emulate.h" + #include #include #include #include #include #include -#include -#include "kvm_svm.h" -#include "x86_emulate.h" +#include MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 7d04ffaaf94..80628f69916 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -17,17 +17,18 @@ #include "kvm.h" #include "vmx.h" +#include "segment_descriptor.h" + #include #include #include #include #include #include + #include #include -#include "segment_descriptor.h" - MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2