diff options
-rw-r--r-- | Documentation/virtual/kvm/cpuid.txt | 4 | ||||
-rw-r--r-- | Documentation/virtual/kvm/hypercalls.txt | 14 | ||||
-rw-r--r-- | arch/mips/kvm/kvm_locore.S | 969 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm_para.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 85 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/kvm_para.h | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 46 |
16 files changed, 592 insertions, 553 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 83afe65d496..22ff659bc0f 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -43,6 +43,10 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by || || writing to msr 0x4b564d02 ------------------------------------------------------------------------------ +KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit + || || before enabling paravirtualized + || || spinlock support. +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index ea113b5d87a..022198e389d 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -64,3 +64,17 @@ Purpose: To enable communication between the hypervisor and guest there is a shared page that contains parts of supervisor visible register state. The guest can map this shared page to access its supervisor register through memory using this hypercall. + +5. KVM_HC_KICK_CPU +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to wakeup a vcpu from HLT state +Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest +kernel mode for an event to occur (ex: a spinlock to become available) can +execute HLT instruction once it has busy-waited for more than a threshold +time-interval. Execution of HLT instruction would cause the hypervisor to put +the vcpu to sleep until occurence of an appropriate event. Another vcpu of the +same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, +specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) +is used in the hypercall for future use. diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index dca2aa66599..bbace092ad0 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Main entry point for the guest, exception handling. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Main entry point for the guest, exception handling. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <asm/asm.h> #include <asm/asmmacro.h> @@ -55,195 +55,193 @@ * a0: run * a1: vcpu */ + .set noreorder + .set noat FEXPORT(__kvm_mips_vcpu_run) - .set push - .set noreorder - .set noat - - /* k0/k1 not being used in host kernel context */ - addiu k1,sp, -PT_SIZE - LONG_S $0, PT_R0(k1) - LONG_S $1, PT_R1(k1) - LONG_S $2, PT_R2(k1) - LONG_S $3, PT_R3(k1) - - LONG_S $4, PT_R4(k1) - LONG_S $5, PT_R5(k1) - LONG_S $6, PT_R6(k1) - LONG_S $7, PT_R7(k1) - - LONG_S $8, PT_R8(k1) - LONG_S $9, PT_R9(k1) - LONG_S $10, PT_R10(k1) - LONG_S $11, PT_R11(k1) - LONG_S $12, PT_R12(k1) - LONG_S $13, PT_R13(k1) - LONG_S $14, PT_R14(k1) - LONG_S $15, PT_R15(k1) - LONG_S $16, PT_R16(k1) - LONG_S $17, PT_R17(k1) - - LONG_S $18, PT_R18(k1) - LONG_S $19, PT_R19(k1) - LONG_S $20, PT_R20(k1) - LONG_S $21, PT_R21(k1) - LONG_S $22, PT_R22(k1) - LONG_S $23, PT_R23(k1) - LONG_S $24, PT_R24(k1) - LONG_S $25, PT_R25(k1) + /* k0/k1 not being used in host kernel context */ + INT_ADDIU k1, sp, -PT_SIZE + LONG_S $0, PT_R0(k1) + LONG_S $1, PT_R1(k1) + LONG_S $2, PT_R2(k1) + LONG_S $3, PT_R3(k1) + + LONG_S $4, PT_R4(k1) + LONG_S $5, PT_R5(k1) + LONG_S $6, PT_R6(k1) + LONG_S $7, PT_R7(k1) + + LONG_S $8, PT_R8(k1) + LONG_S $9, PT_R9(k1) + LONG_S $10, PT_R10(k1) + LONG_S $11, PT_R11(k1) + LONG_S $12, PT_R12(k1) + LONG_S $13, PT_R13(k1) + LONG_S $14, PT_R14(k1) + LONG_S $15, PT_R15(k1) + LONG_S $16, PT_R16(k1) + LONG_S $17, PT_R17(k1) + + LONG_S $18, PT_R18(k1) + LONG_S $19, PT_R19(k1) + LONG_S $20, PT_R20(k1) + LONG_S $21, PT_R21(k1) + LONG_S $22, PT_R22(k1) + LONG_S $23, PT_R23(k1) + LONG_S $24, PT_R24(k1) + LONG_S $25, PT_R25(k1) /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ - LONG_S $28, PT_R28(k1) - LONG_S $29, PT_R29(k1) - LONG_S $30, PT_R30(k1) - LONG_S $31, PT_R31(k1) + LONG_S $28, PT_R28(k1) + LONG_S $29, PT_R29(k1) + LONG_S $30, PT_R30(k1) + LONG_S $31, PT_R31(k1) - /* Save hi/lo */ - mflo v0 - LONG_S v0, PT_LO(k1) - mfhi v1 - LONG_S v1, PT_HI(k1) + /* Save hi/lo */ + mflo v0 + LONG_S v0, PT_LO(k1) + mfhi v1 + LONG_S v1, PT_HI(k1) /* Save host status */ - mfc0 v0, CP0_STATUS - LONG_S v0, PT_STATUS(k1) + mfc0 v0, CP0_STATUS + LONG_S v0, PT_STATUS(k1) /* Save host ASID, shove it into the BVADDR location */ - mfc0 v1,CP0_ENTRYHI - andi v1, 0xff - LONG_S v1, PT_HOST_ASID(k1) + mfc0 v1, CP0_ENTRYHI + andi v1, 0xff + LONG_S v1, PT_HOST_ASID(k1) - /* Save DDATA_LO, will be used to store pointer to vcpu */ - mfc0 v1, CP0_DDATA_LO - LONG_S v1, PT_HOST_USERLOCAL(k1) + /* Save DDATA_LO, will be used to store pointer to vcpu */ + mfc0 v1, CP0_DDATA_LO + LONG_S v1, PT_HOST_USERLOCAL(k1) - /* DDATA_LO has pointer to vcpu */ - mtc0 a1,CP0_DDATA_LO + /* DDATA_LO has pointer to vcpu */ + mtc0 a1, CP0_DDATA_LO - /* Offset into vcpu->arch */ - addiu k1, a1, VCPU_HOST_ARCH + /* Offset into vcpu->arch */ + INT_ADDIU k1, a1, VCPU_HOST_ARCH - /* Save the host stack to VCPU, used for exception processing when we exit from the Guest */ - LONG_S sp, VCPU_HOST_STACK(k1) + /* + * Save the host stack to VCPU, used for exception processing + * when we exit from the Guest + */ + LONG_S sp, VCPU_HOST_STACK(k1) - /* Save the kernel gp as well */ - LONG_S gp, VCPU_HOST_GP(k1) + /* Save the kernel gp as well */ + LONG_S gp, VCPU_HOST_GP(k1) /* Setup status register for running the guest in UM, interrupts are disabled */ - li k0,(ST0_EXL | KSU_USER| ST0_BEV) - mtc0 k0,CP0_STATUS - ehb - - /* load up the new EBASE */ - LONG_L k0, VCPU_GUEST_EBASE(k1) - mtc0 k0,CP0_EBASE - - /* Now that the new EBASE has been loaded, unset BEV, set interrupt mask as it was - * but make sure that timer interrupts are enabled - */ - li k0,(ST0_EXL | KSU_USER | ST0_IE) - andi v0, v0, ST0_IM - or k0, k0, v0 - mtc0 k0,CP0_STATUS - ehb + li k0, (ST0_EXL | KSU_USER | ST0_BEV) + mtc0 k0, CP0_STATUS + ehb + + /* load up the new EBASE */ + LONG_L k0, VCPU_GUEST_EBASE(k1) + mtc0 k0, CP0_EBASE + + /* + * Now that the new EBASE has been loaded, unset BEV, set + * interrupt mask as it was but make sure that timer interrupts + * are enabled + */ + li k0, (ST0_EXL | KSU_USER | ST0_IE) + andi v0, v0, ST0_IM + or k0, k0, v0 + mtc0 k0, CP0_STATUS + ehb /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC + LONG_L t0, VCPU_PC(k1) + mtc0 t0, CP0_EPC FEXPORT(__kvm_mips_load_asid) - /* Set the ASID for the Guest Kernel */ - sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ - addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ + /* Set the ASID for the Guest Kernel */ + INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ + /* addresses shift to 0x80000000 */ + bltz t0, 1f /* If kernel */ + INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ + INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - sll t2, t2, 2 /* x4 */ - addu t3, t1, t2 - LONG_L k0, (t3) - andi k0, k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - /* Now load up the Guest Context from VCPU */ - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* k0/k1 loaded up later */ - - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) - - /* Restore hi/lo */ - LONG_L k0, VCPU_LO(k1) - mtlo k0 - - LONG_L k0, VCPU_HI(k1) - mthi k0 + /* t1: contains the base of the ASID array, need to get the cpu id */ + LONG_L t2, TI_CPU($28) /* smp_processor_id */ + INT_SLL t2, t2, 2 /* x4 */ + REG_ADDU t3, t1, t2 + LONG_L k0, (t3) + andi k0, k0, 0xff + mtc0 k0, CP0_ENTRYHI + ehb + + /* Disable RDHWR access */ + mtc0 zero, CP0_HWRENA + + /* Now load up the Guest Context from VCPU */ + LONG_L $1, VCPU_R1(k1) + LONG_L $2, VCPU_R2(k1) + LONG_L $3, VCPU_R3(k1) + + LONG_L $4, VCPU_R4(k1) + LONG_L $5, VCPU_R5(k1) + LONG_L $6, VCPU_R6(k1) + LONG_L $7, VCPU_R7(k1) + + LONG_L $8, VCPU_R8(k1) + LONG_L $9, VCPU_R9(k1) + LONG_L $10, VCPU_R10(k1) + LONG_L $11, VCPU_R11(k1) + LONG_L $12, VCPU_R12(k1) + LONG_L $13, VCPU_R13(k1) + LONG_L $14, VCPU_R14(k1) + LONG_L $15, VCPU_R15(k1) + LONG_L $16, VCPU_R16(k1) + LONG_L $17, VCPU_R17(k1) + LONG_L $18, VCPU_R18(k1) + LONG_L $19, VCPU_R19(k1) + LONG_L $20, VCPU_R20(k1) + LONG_L $21, VCPU_R21(k1) + LONG_L $22, VCPU_R22(k1) + LONG_L $23, VCPU_R23(k1) + LONG_L $24, VCPU_R24(k1) + LONG_L $25, VCPU_R25(k1) + + /* k0/k1 loaded up later */ + + LONG_L $28, VCPU_R28(k1) + LONG_L $29, VCPU_R29(k1) + LONG_L $30, VCPU_R30(k1) + LONG_L $31, VCPU_R31(k1) + + /* Restore hi/lo */ + LONG_L k0, VCPU_LO(k1) + mtlo k0 + + LONG_L k0, VCPU_HI(k1) + mthi k0 FEXPORT(__kvm_mips_load_k0k1) /* Restore the guest's k0/k1 registers */ - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) + LONG_L k0, VCPU_R26(k1) + LONG_L k1, VCPU_R27(k1) - /* Jump to guest */ + /* Jump to guest */ eret - .set pop VECTOR(MIPSX(exception), unknown) /* * Find out what mode we came from and jump to the proper handler. */ - .set push - .set noat - .set noreorder - mtc0 k0, CP0_ERROREPC #01: Save guest k0 - ehb #02: - - mfc0 k0, CP0_EBASE #02: Get EBASE - srl k0, k0, 10 #03: Get rid of CPUNum - sll k0, k0, 10 #04 - LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - addiu k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 - j k0 #07: jump to the function - nop #08: branch delay slot - .set push + mtc0 k0, CP0_ERROREPC #01: Save guest k0 + ehb #02: + + mfc0 k0, CP0_EBASE #02: Get EBASE + INT_SRL k0, k0, 10 #03: Get rid of CPUNum + INT_SLL k0, k0, 10 #04 + LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 + INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 + j k0 #07: jump to the function + nop #08: branch delay slot VECTOR_END(MIPSX(exceptionEnd)) .end MIPSX(exception) @@ -253,329 +251,327 @@ VECTOR_END(MIPSX(exceptionEnd)) * */ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) - .set push - .set noat - .set noreorder - - /* Get the VCPU pointer from DDTATA_LO */ - mfc0 k1, CP0_DDATA_LO - addiu k1, k1, VCPU_HOST_ARCH - - /* Start saving Guest context to VCPU */ - LONG_S $0, VCPU_R0(k1) - LONG_S $1, VCPU_R1(k1) - LONG_S $2, VCPU_R2(k1) - LONG_S $3, VCPU_R3(k1) - LONG_S $4, VCPU_R4(k1) - LONG_S $5, VCPU_R5(k1) - LONG_S $6, VCPU_R6(k1) - LONG_S $7, VCPU_R7(k1) - LONG_S $8, VCPU_R8(k1) - LONG_S $9, VCPU_R9(k1) - LONG_S $10, VCPU_R10(k1) - LONG_S $11, VCPU_R11(k1) - LONG_S $12, VCPU_R12(k1) - LONG_S $13, VCPU_R13(k1) - LONG_S $14, VCPU_R14(k1) - LONG_S $15, VCPU_R15(k1) - LONG_S $16, VCPU_R16(k1) - LONG_S $17,VCPU_R17(k1) - LONG_S $18, VCPU_R18(k1) - LONG_S $19, VCPU_R19(k1) - LONG_S $20, VCPU_R20(k1) - LONG_S $21, VCPU_R21(k1) - LONG_S $22, VCPU_R22(k1) - LONG_S $23, VCPU_R23(k1) - LONG_S $24, VCPU_R24(k1) - LONG_S $25, VCPU_R25(k1) - - /* Guest k0/k1 saved later */ - - LONG_S $28, VCPU_R28(k1) - LONG_S $29, VCPU_R29(k1) - LONG_S $30, VCPU_R30(k1) - LONG_S $31, VCPU_R31(k1) - - /* We need to save hi/lo and restore them on - * the way out - */ - mfhi t0 - LONG_S t0, VCPU_HI(k1) - - mflo t0 - LONG_S t0, VCPU_LO(k1) - - /* Finally save guest k0/k1 to VCPU */ - mfc0 t0, CP0_ERROREPC - LONG_S t0, VCPU_R26(k1) - - /* Get GUEST k1 and save it in VCPU */ - la t1, ~0x2ff - mfc0 t0, CP0_EBASE - and t0, t0, t1 - LONG_L t0, 0x3000(t0) - LONG_S t0, VCPU_R27(k1) - - /* Now that context has been saved, we can use other registers */ - - /* Restore vcpu */ - mfc0 a1, CP0_DDATA_LO - move s1, a1 - - /* Restore run (vcpu->run) */ - LONG_L a0, VCPU_RUN(a1) - /* Save pointer to run in s0, will be saved by the compiler */ - move s0, a0 - - - /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process the exception */ - mfc0 k0,CP0_EPC - LONG_S k0, VCPU_PC(k1) - - mfc0 k0, CP0_BADVADDR - LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) - - mfc0 k0, CP0_CAUSE - LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) - - mfc0 k0, CP0_ENTRYHI - LONG_S k0, VCPU_HOST_ENTRYHI(k1) - - /* Now restore the host state just enough to run the handlers */ - - /* Swtich EBASE to the one used by Linux */ - /* load up the host EBASE */ - mfc0 v0, CP0_STATUS - - .set at - or k0, v0, ST0_BEV - .set noat - - mtc0 k0, CP0_STATUS - ehb - - LONG_L k0, VCPU_HOST_EBASE(k1) - mtc0 k0,CP0_EBASE - - - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ - .set at - and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) - or v0, v0, ST0_CU0 - .set noat - mtc0 v0, CP0_STATUS - ehb - - /* Load up host GP */ - LONG_L gp, VCPU_HOST_GP(k1) - - /* Need a stack before we can jump to "C" */ - LONG_L sp, VCPU_HOST_STACK(k1) - - /* Saved host state */ - addiu sp,sp, -PT_SIZE + /* Get the VCPU pointer from DDTATA_LO */ + mfc0 k1, CP0_DDATA_LO + INT_ADDIU k1, k1, VCPU_HOST_ARCH + + /* Start saving Guest context to VCPU */ + LONG_S $0, VCPU_R0(k1) + LONG_S $1, VCPU_R1(k1) + LONG_S $2, VCPU_R2(k1) + LONG_S $3, VCPU_R3(k1) + LONG_S $4, VCPU_R4(k1) + LONG_S $5, VCPU_R5(k1) + LONG_S $6, VCPU_R6(k1) + LONG_S $7, VCPU_R7(k1) + LONG_S $8, VCPU_R8(k1) + LONG_S $9, VCPU_R9(k1) + LONG_S $10, VCPU_R10(k1) + LONG_S $11, VCPU_R11(k1) + LONG_S $12, VCPU_R12(k1) + LONG_S $13, VCPU_R13(k1) + LONG_S $14, VCPU_R14(k1) + LONG_S $15, VCPU_R15(k1) + LONG_S $16, VCPU_R16(k1) + LONG_S $17, VCPU_R17(k1) + LONG_S $18, VCPU_R18(k1) + LONG_S $19, VCPU_R19(k1) + LONG_S $20, VCPU_R20(k1) + LONG_S $21, VCPU_R21(k1) + LONG_S $22, VCPU_R22(k1) + LONG_S $23, VCPU_R23(k1) + LONG_S $24, VCPU_R24(k1) + LONG_S $25, VCPU_R25(k1) + + /* Guest k0/k1 saved later */ + + LONG_S $28, VCPU_R28(k1) + LONG_S $29, VCPU_R29(k1) + LONG_S $30, VCPU_R30(k1) + LONG_S $31, VCPU_R31(k1) + + /* We need to save hi/lo and restore them on + * the way out + */ + mfhi t0 + LONG_S t0, VCPU_HI(k1) + + mflo t0 + LONG_S t0, VCPU_LO(k1) + + /* Finally save guest k0/k1 to VCPU */ + mfc0 t0, CP0_ERROREPC + LONG_S t0, VCPU_R26(k1) + + /* Get GUEST k1 and save it in VCPU */ + PTR_LI t1, ~0x2ff + mfc0 t0, CP0_EBASE + and t0, t0, t1 + LONG_L t0, 0x3000(t0) + LONG_S t0, VCPU_R27(k1) + + /* Now that context has been saved, we can use other registers */ + + /* Restore vcpu */ + mfc0 a1, CP0_DDATA_LO + move s1, a1 + + /* Restore run (vcpu->run) */ + LONG_L a0, VCPU_RUN(a1) + /* Save pointer to run in s0, will be saved by the compiler */ + move s0, a0 + + /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to + * process the exception */ + mfc0 k0,CP0_EPC + LONG_S k0, VCPU_PC(k1) + + mfc0 k0, CP0_BADVADDR + LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) + + mfc0 k0, CP0_CAUSE + LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) + + mfc0 k0, CP0_ENTRYHI + LONG_S k0, VCPU_HOST_ENTRYHI(k1) + + /* Now restore the host state just enough to run the handlers */ + + /* Swtich EBASE to the one used by Linux */ + /* load up the host EBASE */ + mfc0 v0, CP0_STATUS + + .set at + or k0, v0, ST0_BEV + .set noat + + mtc0 k0, CP0_STATUS + ehb + + LONG_L k0, VCPU_HOST_EBASE(k1) + mtc0 k0,CP0_EBASE + - /* XXXKYMA do we need to load the host ASID, maybe not because the - * kernel entries are marked GLOBAL, need to verify - */ + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ + .set at + and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) + or v0, v0, ST0_CU0 + .set noat + mtc0 v0, CP0_STATUS + ehb + + /* Load up host GP */ + LONG_L gp, VCPU_HOST_GP(k1) + + /* Need a stack before we can jump to "C" */ + LONG_L sp, VCPU_HOST_STACK(k1) + + /* Saved host state */ + INT_ADDIU sp, sp, -PT_SIZE - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(sp) - mtc0 k0, CP0_DDATA_LO + /* XXXKYMA do we need to load the host ASID, maybe not because the + * kernel entries are marked GLOBAL, need to verify + */ - /* Restore RDHWR access */ - la k0, 0x2000000F - mtc0 k0, CP0_HWRENA + /* Restore host DDATA_LO */ + LONG_L k0, PT_HOST_USERLOCAL(sp) + mtc0 k0, CP0_DDATA_LO - /* Jump to handler */ + /* Restore RDHWR access */ + PTR_LI k0, 0x2000000F + mtc0 k0, CP0_HWRENA + + /* Jump to handler */ FEXPORT(__kvm_mips_jump_to_handler) - /* XXXKYMA: not sure if this is safe, how large is the stack?? */ - /* Now jump to the kvm_mips_handle_exit() to see if we can deal with this in the kernel */ - la t9,kvm_mips_handle_exit - jalr.hb t9 - addiu sp,sp, -CALLFRAME_SIZ /* BD Slot */ - - /* Return from handler Make sure interrupts are disabled */ - di - ehb - - /* XXXKYMA: k0/k1 could have been blown away if we processed an exception - * while we were handling the exception from the guest, reload k1 - */ - move k1, s1 - addiu k1, k1, VCPU_HOST_ARCH - - /* Check return value, should tell us if we are returning to the host (handle I/O etc) - * or resuming the guest - */ - andi t0, v0, RESUME_HOST - bnez t0, __kvm_mips_return_to_host - nop + /* XXXKYMA: not sure if this is safe, how large is the stack?? + * Now jump to the kvm_mips_handle_exit() to see if we can deal + * with this in the kernel */ + PTR_LA t9, kvm_mips_handle_exit + jalr.hb t9 + INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ + + /* Return from handler Make sure interrupts are disabled */ + di + ehb + + /* XXXKYMA: k0/k1 could have been blown away if we processed + * an exception while we were handling the exception from the + * guest, reload k1 + */ + + move k1, s1 + INT_ADDIU k1, k1, VCPU_HOST_ARCH + + /* Check return value, should tell us if we are returning to the + * host (handle I/O etc)or resuming the guest + */ + andi t0, v0, RESUME_HOST + bnez t0, __kvm_mips_return_to_host + nop __kvm_mips_return_to_guest: - /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ - mtc0 s1, CP0_DDATA_LO - - /* Load up the Guest EBASE to minimize the window where BEV is set */ - LONG_L t0, VCPU_GUEST_EBASE(k1) - - /* Switch EBASE back to the one used by KVM */ - mfc0 v1, CP0_STATUS - .set at - or k0, v1, ST0_BEV - .set noat - mtc0 k0, CP0_STATUS - ehb - mtc0 t0,CP0_EBASE - - /* Setup status register for running guest in UM */ - .set at - or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) - and v1, v1, ~ST0_CU0 - .set noat - mtc0 v1, CP0_STATUS - ehb + /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ + mtc0 s1, CP0_DDATA_LO + /* Load up the Guest EBASE to minimize the window where BEV is set */ + LONG_L t0, VCPU_GUEST_EBASE(k1) + + /* Switch EBASE back to the one used by KVM */ + mfc0 v1, CP0_STATUS + .set at + or k0, v1, ST0_BEV + .set noat + mtc0 k0, CP0_STATUS + ehb + mtc0 t0, CP0_EBASE + + /* Setup status register for running guest in UM */ + .set at + or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) + and v1, v1, ~ST0_CU0 + .set noat + mtc0 v1, CP0_STATUS + ehb /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC - - /* Set the ASID for the Guest Kernel */ - sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ - addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ + LONG_L t0, VCPU_PC(k1) + mtc0 t0, CP0_EPC + + /* Set the ASID for the Guest Kernel */ + INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ + /* addresses shift to 0x80000000 */ + bltz t0, 1f /* If kernel */ + INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ + INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - sll t2, t2, 2 /* x4 */ - addu t3, t1, t2 - LONG_L k0, (t3) - andi k0, k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - /* load the guest context from VCPU and return */ - LONG_L $0, VCPU_R0(k1) - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* $/k1 loaded later */ - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) + /* t1: contains the base of the ASID array, need to get the cpu id */ + LONG_L t2, TI_CPU($28) /* smp_processor_id */ + INT_SLL t2, t2, 2 /* x4 */ + REG_ADDU t3, t1, t2 + LONG_L k0, (t3) + andi k0, k0, 0xff + mtc0 k0,CP0_ENTRYHI + ehb + + /* Disable RDHWR access */ + mtc0 zero, CP0_HWRENA + + /* load the guest context from VCPU and return */ + LONG_L $0, VCPU_R0(k1) + LONG_L $1, VCPU_R1(k1) + LONG_L $2, VCPU_R2(k1) + LONG_L $3, VCPU_R3(k1) + LONG_L $4, VCPU_R4(k1) + LONG_L $5, VCPU_R5(k1) + LONG_L $6, VCPU_R6(k1) + LONG_L $7, VCPU_R7(k1) + LONG_L $8, VCPU_R8(k1) + LONG_L $9, VCPU_R9(k1) + LONG_L $10, VCPU_R10(k1) + LONG_L $11, VCPU_R11(k1) + LONG_L $12, VCPU_R12(k1) + LONG_L $13, VCPU_R13(k1) + LONG_L $14, VCPU_R14(k1) + LONG_L $15, VCPU_R15(k1) + LONG_L $16, VCPU_R16(k1) + LONG_L $17, VCPU_R17(k1) + LONG_L $18, VCPU_R18(k1) + LONG_L $19, VCPU_R19(k1) + LONG_L $20, VCPU_R20(k1) + LONG_L $21, VCPU_R21(k1) + LONG_L $22, VCPU_R22(k1) + LONG_L $23, VCPU_R23(k1) + LONG_L $24, VCPU_R24(k1) + LONG_L $25, VCPU_R25(k1) + + /* $/k1 loaded later */ + LONG_L $28, VCPU_R28(k1) + LONG_L $29, VCPU_R29(k1) + LONG_L $30, VCPU_R30(k1) + LONG_L $31, VCPU_R31(k1) FEXPORT(__kvm_mips_skip_guest_restore) - LONG_L k0, VCPU_HI(k1) - mthi k0 + LONG_L k0, VCPU_HI(k1) + mthi k0 - LONG_L k0, VCPU_LO(k1) - mtlo k0 + LONG_L k0, VCPU_LO(k1) + mtlo k0 - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) + LONG_L k0, VCPU_R26(k1) + LONG_L k1, VCPU_R27(k1) - eret + eret __kvm_mips_return_to_host: - /* EBASE is already pointing to Linux */ - LONG_L k1, VCPU_HOST_STACK(k1) - addiu k1,k1, -PT_SIZE - - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(k1) - mtc0 k0, CP0_DDATA_LO - - /* Restore host ASID */ - LONG_L k0, PT_HOST_ASID(sp) - andi k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Load context saved on the host stack */ - LONG_L $0, PT_R0(k1) - LONG_L $1, PT_R1(k1) - - /* r2/v0 is the return code, shift it down by 2 (arithmetic) to recover the err code */ - sra k0, v0, 2 - move $2, k0 - - LONG_L $3, PT_R3(k1) - LONG_L $4, PT_R4(k1) - LONG_L $5, PT_R5(k1) - LONG_L $6, PT_R6(k1) - LONG_L $7, PT_R7(k1) - LONG_L $8, PT_R8(k1) - LONG_L $9, PT_R9(k1) - LONG_L $10, PT_R10(k1) - LONG_L $11, PT_R11(k1) - LONG_L $12, PT_R12(k1) - LONG_L $13, PT_R13(k1) - LONG_L $14, PT_R14(k1) - LONG_L $15, PT_R15(k1) - LONG_L $16, PT_R16(k1) - LONG_L $17, PT_R17(k1) - LONG_L $18, PT_R18(k1) - LONG_L $19, PT_R19(k1) - LONG_L $20, PT_R20(k1) - LONG_L $21, PT_R21(k1) - LONG_L $22, PT_R22(k1) - LONG_L $23, PT_R23(k1) - LONG_L $24, PT_R24(k1) - LONG_L $25, PT_R25(k1) - - /* Host k0/k1 were not saved */ - - LONG_L $28, PT_R28(k1) - LONG_L $29, PT_R29(k1) - LONG_L $30, PT_R30(k1) - - LONG_L k0, PT_HI(k1) - mthi k0 - - LONG_L k0, PT_LO(k1) - mtlo k0 - - /* Restore RDHWR access */ - la k0, 0x2000000F - mtc0 k0, CP0_HWRENA - - - /* Restore RA, which is the address we will return to */ - LONG_L ra, PT_R31(k1) - j ra - nop - - .set pop + /* EBASE is already pointing to Linux */ + LONG_L k1, VCPU_HOST_STACK(k1) + INT_ADDIU k1,k1, -PT_SIZE + + /* Restore host DDATA_LO */ + LONG_L k0, PT_HOST_USERLOCAL(k1) + mtc0 k0, CP0_DDATA_LO + + /* Restore host ASID */ + LONG_L k0, PT_HOST_ASID(sp) + andi k0, 0xff + mtc0 k0,CP0_ENTRYHI + ehb + + /* Load context saved on the host stack */ + LONG_L $0, PT_R0(k1) + LONG_L $1, PT_R1(k1) + + /* r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code */ + INT_SRA k0, v0, 2 + move $2, k0 + + LONG_L $3, PT_R3(k1) + LONG_L $4, PT_R4(k1) + LONG_L $5, PT_R5(k1) + LONG_L $6, PT_R6(k1) + LONG_L $7, PT_R7(k1) + LONG_L $8, PT_R8(k1) + LONG_L $9, PT_R9(k1) + LONG_L $10, PT_R10(k1) + LONG_L $11, PT_R11(k1) + LONG_L $12, PT_R12(k1) + LONG_L $13, PT_R13(k1) + LONG_L $14, PT_R14(k1) + LONG_L $15, PT_R15(k1) + LONG_L $16, PT_R16(k1) + LONG_L $17, PT_R17(k1) + LONG_L $18, PT_R18(k1) + LONG_L $19, PT_R19(k1) + LONG_L $20, PT_R20(k1) + LONG_L $21, PT_R21(k1) + LONG_L $22, PT_R22(k1) + LONG_L $23, PT_R23(k1) + LONG_L $24, PT_R24(k1) + LONG_L $25, PT_R25(k1) + + /* Host k0/k1 were not saved */ + + LONG_L $28, PT_R28(k1) + LONG_L $29, PT_R29(k1) + LONG_L $30, PT_R30(k1) + + LONG_L k0, PT_HI(k1) + mthi k0 + + LONG_L k0, PT_LO(k1) + mtlo k0 + + /* Restore RDHWR access */ + PTR_LI k0, 0x2000000F + mtc0 k0, CP0_HWRENA + + + /* Restore RA, which is the address we will return to */ + LONG_L ra, PT_R31(k1) + j ra + nop + VECTOR_END(MIPSX(GuestExceptionEnd)) .end MIPSX(GuestException) @@ -627,24 +623,23 @@ MIPSX(exceptions): #define HW_SYNCI_Step $1 LEAF(MIPSX(SyncICache)) - .set push + .set push .set mips32r2 - beq a1, zero, 20f - nop - addu a1, a0, a1 - rdhwr v0, HW_SYNCI_Step - beq v0, zero, 20f - nop - + beq a1, zero, 20f + nop + REG_ADDU a1, a0, a1 + rdhwr v0, HW_SYNCI_Step + beq v0, zero, 20f + nop 10: - synci 0(a0) - addu a0, a0, v0 - sltu v1, a0, a1 - bne v1, zero, 10b - nop - sync + synci 0(a0) + REG_ADDU a0, a0, v0 + sltu v1, a0, a1 + bne v1, zero, 10b + nop + sync 20: - jr.hb ra - nop - .set pop + jr.hb ra + nop + .set pop END(MIPSX(SyncICache)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b7a1911d108..043eec8461e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1579,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) ctx->first_pass = 1; rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; - ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { kvm_put_kvm(kvm); return ret; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b2d3f3b2de7..54cf9bc94da 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, mutex_unlock(&kvm->lock); return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR); + stt, O_RDWR | O_CLOEXEC); fail: if (stt) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8aadd237d45..b0ee3bc9ca7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1564,7 +1564,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) if (!ri) return -ENOMEM; - fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); + fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); if (fd < 0) kvm_release_rma(ri); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c0efd16bdfa..c76ff74a98f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -516,6 +516,11 @@ struct kvm_vcpu_arch { /* set at EPT violation at this point */ unsigned long exit_qualification; + + /* pv related host specific info */ + struct { + bool pv_unhalted; + } pv; }; struct kvm_lpage_info { diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e9..94dc8ca434e 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -23,6 +23,7 @@ #define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_PV_UNHALT 7 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cb..b110fe6c03d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | + (1 << KVM_FEATURE_PV_UNHALT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c98f0544232..5439117d5c4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -689,7 +689,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_REMRD: - apic_debug("Ignoring delivery mode 3\n"); + result = 1; + vcpu->arch.pv.pv_unhalted = 1; + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_SMI: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9651c993758..6e2d2c8f230 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2781,7 +2781,7 @@ exit: return ret; } -static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) +static bool page_fault_can_be_fast(u32 error_code) { /* * Do not fix the mmio spte with invalid generation number which @@ -2834,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, bool ret = false; u64 spte = 0ull; - if (!page_fault_can_be_fast(vcpu, error_code)) + if (!page_fault_can_be_fast(error_code)) return false; walk_shadow_page_lockless_begin(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 57b4e129891..1f1da43ff2a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5485,6 +5485,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_USER_EXIT) { + ++vcpu->stat.mmio_exits; ret = 0; goto out; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 668f19aee6c..e5ca72a5cdb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1447,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) #endif } +static void kvm_gen_update_masterclock(struct kvm *kvm) +{ +#ifdef CONFIG_X86_64 + int i; + struct kvm_vcpu *vcpu; + struct kvm_arch *ka = &kvm->arch; + + spin_lock(&ka->pvclock_gtod_sync_lock); + kvm_make_mclock_inprogress_request(kvm); + /* no guest entries from this point */ + pvclock_update_vm_gtod_copy(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) + set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + + /* guest entries allowed */ + kvm_for_each_vcpu(i, vcpu, kvm) + clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); + + spin_unlock(&ka->pvclock_gtod_sync_lock); +#endif +} + static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags, this_tsc_khz; @@ -3796,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp, delta = user_ns.clock - now_ns; local_irq_enable(); kvm->arch.kvmclock_offset = delta; + kvm_gen_update_masterclock(kvm); break; } case KVM_GET_CLOCK: { @@ -5122,9 +5146,10 @@ restart: inject_emulated_exception(vcpu); r = EMULATE_DONE; } else if (vcpu->arch.pio.count) { - if (!vcpu->arch.pio.in) + if (!vcpu->arch.pio.in) { + /* FIXME: return into emulator if single-stepping. */ vcpu->arch.pio.count = 0; - else { + } else { writeback = false; vcpu->arch.complete_userspace_io = complete_emulated_pio; } @@ -5588,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 1; } +/* + * kvm_pv_kick_cpu_op: Kick a vcpu. + * + * @apicid - apicid of vcpu to be kicked. + */ +static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) +{ + struct kvm_lapic_irq lapic_irq; + + lapic_irq.shorthand = 0; + lapic_irq.dest_mode = 0; + lapic_irq.dest_id = apicid; + + lapic_irq.delivery_mode = APIC_DM_REMRD; + kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5621,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HC_KICK_CPU: + kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + ret = 0; + break; default: ret = -KVM_ENOSYS; break; @@ -5782,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } -static void kvm_gen_update_masterclock(struct kvm *kvm) -{ -#ifdef CONFIG_X86_64 - int i; - struct kvm_vcpu *vcpu; - struct kvm_arch *ka = &kvm->arch; - - spin_lock(&ka->pvclock_gtod_sync_lock); - kvm_make_mclock_inprogress_request(kvm); - /* no guest entries from this point */ - pvclock_update_vm_gtod_copy(kvm); - - kvm_for_each_vcpu(i, vcpu, kvm) - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); - - /* guest entries allowed */ - kvm_for_each_vcpu(i, vcpu, kvm) - clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); - - spin_unlock(&ka->pvclock_gtod_sync_lock); -#endif -} - static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; @@ -6043,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: + vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; case KVM_MP_STATE_RUNNABLE: @@ -6154,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { vcpu->mmio_needed = 0; + + /* FIXME: return into emulator if single-stepping. */ if (vcpu->mmio_is_write) return 1; vcpu->mmio_read_completed = 1; @@ -6342,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { kvm_apic_accept_events(vcpu); - mp_state->mp_state = vcpu->arch.mp_state; + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && + vcpu->arch.pv.pv_unhalted) + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + else + mp_state->mp_state = vcpu->arch.mp_state; + return 0; } @@ -6863,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -7200,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) || kvm_apic_has_events(vcpu) + || vcpu->arch.pv.pv_unhalted || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index acccd08be6c..99c25338ede 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_SPAPR_MULTITCE 94 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index cea2c5c72d2..2841f86eae0 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -19,6 +19,7 @@ #define KVM_HC_MMU_OP 2 #define KVM_HC_FEATURES 3 #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_KICK_CPU 5 /* * hypercalls use architecture specific diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c6c8bbea174..bf040c4e02b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,28 +102,8 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) { - int reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); - reserved = PageReserved(head); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_trans_head takes care of that) - * but the hugepage may have been splitted - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); return true; } @@ -1896,7 +1876,7 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); + return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } /* @@ -2305,7 +2285,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { ops->destroy(dev); return ret; @@ -2589,7 +2569,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } #endif - r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); + r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); if (r < 0) kvm_put_kvm(kvm); @@ -2815,8 +2795,8 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1, - const struct kvm_io_range *r2) +static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, + const struct kvm_io_range *r2) { if (r1->addr < r2->addr) return -1; @@ -2827,7 +2807,7 @@ static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1, static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { - return __kvm_io_bus_sort_cmp(p1, p2); + return kvm_io_bus_cmp(p1, p2); } static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, @@ -2863,7 +2843,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, off = range - bus->range; - while (off > 0 && __kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) + while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) off--; return off; @@ -2879,7 +2859,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, return -EOPNOTSUPP; while (idx < bus->dev_count && - __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { + kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, range->len, val)) return idx; @@ -2923,7 +2903,7 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && - (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) + (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, val)) return cookie; @@ -2945,7 +2925,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, return -EOPNOTSUPP; while (idx < bus->dev_count && - __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { + kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, range->len, val)) return idx; @@ -2989,7 +2969,7 @@ int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && - (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) + (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len, val)) return cookie; |