diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-24 12:47:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-24 12:47:25 -0700 |
commit | 1765a1fe5d6f82c0eceb1ad10594cfc83759b6d0 (patch) | |
tree | a701020f0fa3a1932a36d174c5fffd20496303a9 /arch/powerpc | |
parent | bdaf12b41235b0c59949914de022341e77907461 (diff) | |
parent | 2a31339aa014c0d0b97c57d3ebc997732f8f47fc (diff) |
Merge branch 'kvm-updates/2.6.37' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.37' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (321 commits)
KVM: Drop CONFIG_DMAR dependency around kvm_iommu_map_pages
KVM: Fix signature of kvm_iommu_map_pages stub
KVM: MCE: Send SRAR SIGBUS directly
KVM: MCE: Add MCG_SER_P into KVM_MCE_CAP_SUPPORTED
KVM: fix typo in copyright notice
KVM: Disable interrupts around get_kernel_ns()
KVM: MMU: Avoid sign extension in mmu_alloc_direct_roots() pae root address
KVM: MMU: move access code parsing to FNAME(walk_addr) function
KVM: MMU: audit: check whether have unsync sps after root sync
KVM: MMU: audit: introduce audit_printk to cleanup audit code
KVM: MMU: audit: unregister audit tracepoints before module unloaded
KVM: MMU: audit: fix vcpu's spte walking
KVM: MMU: set access bit for direct mapping
KVM: MMU: cleanup for error mask set while walk guest page table
KVM: MMU: update 'root_hpa' out of loop in PAE shadow path
KVM: x86 emulator: Eliminate compilation warning in x86_decode_insn()
KVM: x86: Fix constant type in kvm_get_time_scale
KVM: VMX: Add AX to list of registers clobbered by guest switch
KVM guest: Move a printk that's using the clock before it's ready
KVM: x86: TSC catchup mode
...
Diffstat (limited to 'arch/powerpc')
34 files changed, 2093 insertions, 425 deletions
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 6c5547d82bb..18ea6963ad7 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -86,5 +86,6 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_SET -1U #define KVM_INTERRUPT_UNSET -2U +#define KVM_INTERRUPT_SET_LEVEL -3U #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index c5ea4cda34b..5b750467439 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -58,6 +58,7 @@ #define BOOK3S_INTERRUPT_INST_STORAGE 0x400 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 +#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 @@ -84,7 +85,8 @@ #define BOOK3S_IRQPRIO_EXTERNAL 13 #define BOOK3S_IRQPRIO_DECREMENTER 14 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 -#define BOOK3S_IRQPRIO_MAX 16 +#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 +#define BOOK3S_IRQPRIO_MAX 17 #define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_SLB 0x2 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8274a2d4392..d62e703f121 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -38,15 +38,6 @@ struct kvmppc_slb { bool class : 1; }; -struct kvmppc_sr { - u32 raw; - u32 vsid; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool valid : 1; -}; - struct kvmppc_bat { u64 raw; u32 bepi; @@ -69,6 +60,13 @@ struct kvmppc_sid_map { #define SID_MAP_NUM (1 << SID_MAP_BITS) #define SID_MAP_MASK (SID_MAP_NUM - 1) +#ifdef CONFIG_PPC_BOOK3S_64 +#define SID_CONTEXTS 1 +#else +#define SID_CONTEXTS 128 +#define VSID_POOL_SIZE (SID_CONTEXTS * 16) +#endif + struct kvmppc_vcpu_book3s { struct kvm_vcpu vcpu; struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; @@ -79,20 +77,22 @@ struct kvmppc_vcpu_book3s { u64 vsid; } slb_shadow[64]; u8 slb_shadow_max; - struct kvmppc_sr sr[16]; struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; u64 gqr[8]; int slb_nr; - u32 dsisr; u64 sdr1; u64 hior; u64 msr_mask; - u64 vsid_first; u64 vsid_next; +#ifdef CONFIG_PPC_BOOK3S_32 + u32 vsid_pool[VSID_POOL_SIZE]; +#else + u64 vsid_first; u64 vsid_max; - int context_id; +#endif + int context_id[SID_CONTEXTS]; ulong prog_flags; /* flags to inject when giving a 700 trap */ }; @@ -131,9 +131,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -extern u32 kvmppc_trampoline_lowmem; -extern u32 kvmppc_trampoline_enter; +extern ulong kvmppc_trampoline_lowmem; +extern ulong kvmppc_trampoline_enter; extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b0b23c007d6..bba3b9b72a3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/kvm_types.h> +#include <linux/kvm_para.h> #include <asm/kvm_asm.h> #define KVM_MAX_VCPUS 1 @@ -41,12 +42,17 @@ #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 +#define HPTEG_HASH_BITS_PTE_LONG 12 #define HPTEG_HASH_BITS_VPTE 13 #define HPTEG_HASH_BITS_VPTE_LONG 5 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) +#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) +/* Physical Address Mask - allowed range of real mode RAM access */ +#define KVM_PAM 0x0fffffffffffffffULL + struct kvm; struct kvm_run; struct kvm_vcpu; @@ -159,8 +165,10 @@ struct kvmppc_mmu { struct hpte_cache { struct hlist_node list_pte; + struct hlist_node list_pte_long; struct hlist_node list_vpte; struct hlist_node list_vpte_long; + struct rcu_head rcu_head; u64 host_va; u64 pfn; ulong slot; @@ -210,28 +218,20 @@ struct kvm_vcpu_arch { u32 cr; #endif - ulong msr; #ifdef CONFIG_PPC_BOOK3S ulong shadow_msr; ulong hflags; ulong guest_owned_ext; #endif u32 mmucr; - ulong sprg0; - ulong sprg1; - ulong sprg2; - ulong sprg3; ulong sprg4; ulong sprg5; ulong sprg6; ulong sprg7; - ulong srr0; - ulong srr1; ulong csrr0; ulong csrr1; ulong dsrr0; ulong dsrr1; - ulong dear; ulong esr; u32 dec; u32 decar; @@ -290,12 +290,17 @@ struct kvm_vcpu_arch { struct tasklet_struct tasklet; u64 dec_jiffies; unsigned long pending_exceptions; + struct kvm_vcpu_arch_shared *shared; + unsigned long magic_page_pa; /* phys addr to map the magic page to */ + unsigned long magic_page_ea; /* effect. addr to map the magic page to */ #ifdef CONFIG_PPC_BOOK3S struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; + struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; int hpte_cache_count; + spinlock_t mmu_lock; #endif }; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 2d48f6a63d0..50533f9adf4 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -20,16 +20,153 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ +#include <linux/types.h> + +struct kvm_vcpu_arch_shared { + __u64 scratch1; + __u64 scratch2; + __u64 scratch3; + __u64 critical; /* Guest may not get interrupts if == r1 */ + __u64 sprg0; + __u64 sprg1; + __u64 sprg2; + __u64 sprg3; + __u64 srr0; + __u64 srr1; + __u64 dar; + __u64 msr; + __u32 dsisr; + __u32 int_pending; /* Tells the guest if we have an interrupt */ + __u32 sr[16]; +}; + +#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ +#define HC_VENDOR_KVM (42 << 16) +#define HC_EV_SUCCESS 0 +#define HC_EV_UNIMPLEMENTED 12 + +#define KVM_FEATURE_MAGIC_PAGE 1 + +#define KVM_MAGIC_FEAT_SR (1 << 0) + #ifdef __KERNEL__ +#ifdef CONFIG_KVM_GUEST + +#include <linux/of.h> + +static inline int kvm_para_available(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} + +extern unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr); + +#else + static inline int kvm_para_available(void) { return 0; } +static unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + return HC_EV_UNIMPLEMENTED; +} + +#endif + +static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) +{ + unsigned long in[8]; + unsigned long out[8]; + unsigned long r; + + r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + *r2 = out[0]; + + return r; +} + +static inline long kvm_hypercall0(unsigned int nr) +{ + unsigned long in[8]; + unsigned long out[8]; + + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + in[3] = p4; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + + static inline unsigned int kvm_arch_para_features(void) { - return 0; + unsigned long r; + + if (!kvm_para_available()) + return 0; + + if(kvm_hypercall0_1(KVM_HC_FEATURES, &r)) + return 0; + + return r; } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 18d139ec2d2..ecb3bc74c34 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); +extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4ed076a4db2..36c30f31ec9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -129,6 +129,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o + # Disable GCOV in odd or sensitive code GCOV_PROFILE_prom_init.o := n GCOV_PROFILE_ftrace.o := n diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c3e01945ad4..bd0df2e6aa8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -48,11 +48,11 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/alpaca.h> #endif -#ifdef CONFIG_KVM +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST) #include <linux/kvm_host.h> -#ifndef CONFIG_BOOKE -#include <asm/kvm_book3s.h> #endif +#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) +#include <asm/kvm_book3s.h> #endif #ifdef CONFIG_PPC32 @@ -396,12 +396,13 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); + DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S @@ -466,6 +467,22 @@ int main(void) DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ #endif + +#ifdef CONFIG_KVM_GUEST + DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, + scratch1)); + DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared, + scratch2)); + DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared, + scratch3)); + DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared, + int_pending)); + DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, + critical)); + DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr)); +#endif + #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 39b0c48872d..9f8b01d6466 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -299,6 +299,12 @@ slb_miss_user_pseries: b . /* prevent spec. execution */ #endif /* __DISABLED__ */ +/* KVM's trampoline code needs to be close to the interrupt handlers */ + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + .align 7 .globl __end_interrupts __end_interrupts: diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index c571cd3c145..f0dd577e4a5 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -166,12 +166,6 @@ exception_marker: #include "exceptions-64s.S" #endif -/* KVM trampoline code needs to be close to the interrupt handlers */ - -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#include "../kvm/book3s_rmhandlers.S" -#endif - _GLOBAL(generic_secondary_thread_init) mr r24,r3 diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c new file mode 100644 index 00000000000..428d0e538ae --- /dev/null +++ b/arch/powerpc/kernel/kvm.c @@ -0,0 +1,596 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/init.h> +#include <linux/kvm_para.h> +#include <linux/slab.h> +#include <linux/of.h> + +#include <asm/reg.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/disassemble.h> + +#define KVM_MAGIC_PAGE (-4096L) +#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) + +#define KVM_INST_LWZ 0x80000000 +#define KVM_INST_STW 0x90000000 +#define KVM_INST_LD 0xe8000000 +#define KVM_INST_STD 0xf8000000 +#define KVM_INST_NOP 0x60000000 +#define KVM_INST_B 0x48000000 +#define KVM_INST_B_MASK 0x03ffffff +#define KVM_INST_B_MAX 0x01ffffff + +#define KVM_MASK_RT 0x03e00000 +#define KVM_RT_30 0x03c00000 +#define KVM_MASK_RB 0x0000f800 +#define KVM_INST_MFMSR 0x7c0000a6 +#define KVM_INST_MFSPR_SPRG0 0x7c1042a6 +#define KVM_INST_MFSPR_SPRG1 0x7c1142a6 +#define KVM_INST_MFSPR_SPRG2 0x7c1242a6 +#define KVM_INST_MFSPR_SPRG3 0x7c1342a6 +#define KVM_INST_MFSPR_SRR0 0x7c1a02a6 +#define KVM_INST_MFSPR_SRR1 0x7c1b02a6 +#define KVM_INST_MFSPR_DAR 0x7c1302a6 +#define KVM_INST_MFSPR_DSISR 0x7c1202a6 + +#define KVM_INST_MTSPR_SPRG0 0x7c1043a6 +#define KVM_INST_MTSPR_SPRG1 0x7c1143a6 +#define KVM_INST_MTSPR_SPRG2 0x7c1243a6 +#define KVM_INST_MTSPR_SPRG3 0x7c1343a6 +#define KVM_INST_MTSPR_SRR0 0x7c1a03a6 +#define KVM_INST_MTSPR_SRR1 0x7c1b03a6 +#define KVM_INST_MTSPR_DAR 0x7c1303a6 +#define KVM_INST_MTSPR_DSISR 0x7c1203a6 + +#define KVM_INST_TLBSYNC 0x7c00046c +#define KVM_INST_MTMSRD_L0 0x7c000164 +#define KVM_INST_MTMSRD_L1 0x7c010164 +#define KVM_INST_MTMSR 0x7c000124 + +#define KVM_INST_WRTEEI_0 0x7c000146 +#define KVM_INST_WRTEEI_1 0x7c008146 + +#define KVM_INST_MTSRIN 0x7c0001e4 + +static bool kvm_patching_worked = true; +static char kvm_tmp[1024 * 1024]; +static int kvm_tmp_index; + +static inline void kvm_patch_ins(u32 *inst, u32 new_inst) +{ + *inst = new_inst; + flush_icache_range((ulong)inst, (ulong)inst + 4); +} + +static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff)); +} + +static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc)); +} + +static void kvm_patch_ins_nop(u32 *inst) +{ + kvm_patch_ins(inst, KVM_INST_NOP); +} + +static void kvm_patch_ins_b(u32 *inst, int addr) +{ +#ifdef CONFIG_RELOCATABLE + /* On relocatable kernels interrupts handlers and our code + can be in different regions, so we don't patch them */ + + extern u32 __end_interrupts; + if ((ulong)inst < (ulong)&__end_interrupts) + return; +#endif + + kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK)); +} + +static u32 *kvm_alloc(int len) +{ + u32 *p; + + if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) { + printk(KERN_ERR "KVM: No more space (%d + %d)\n", + kvm_tmp_index, len); + kvm_patching_worked = false; + return NULL; + } + + p = (void*)&kvm_tmp[kvm_tmp_index]; + kvm_tmp_index += len; + + return p; +} + +extern u32 kvm_emulate_mtmsrd_branch_offs; +extern u32 kvm_emulate_mtmsrd_reg_offs; +extern u32 kvm_emulate_mtmsrd_orig_ins_offs; +extern u32 kvm_emulate_mtmsrd_len; +extern u32 kvm_emulate_mtmsrd[]; + +static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsrd_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4); + p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK; + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsrd_reg_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +extern u32 kvm_emulate_mtmsr_branch_offs; +extern u32 kvm_emulate_mtmsr_reg1_offs; +extern u32 kvm_emulate_mtmsr_reg2_offs; +extern u32 kvm_emulate_mtmsr_orig_ins_offs; +extern u32 kvm_emulate_mtmsr_len; +extern u32 kvm_emulate_mtmsr[]; + +static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsr_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4); + p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK; + + /* Make clobbered registers work too */ + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch2), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch1), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsr_reg1_offs] |= rt; + p[kvm_emulate_mtmsr_reg2_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsr_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#ifdef CONFIG_BOOKE + +extern u32 kvm_emulate_wrteei_branch_offs; +extern u32 kvm_emulate_wrteei_ee_offs; +extern u32 kvm_emulate_wrteei_len; +extern u32 kvm_emulate_wrteei[]; + +static void kvm_patch_ins_wrteei(u32 *inst) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_wrteei_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4); + p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE); + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + +extern u32 kvm_emulate_mtsrin_branch_offs; +extern u32 kvm_emulate_mtsrin_reg1_offs; +extern u32 kvm_emulate_mtsrin_reg2_offs; +extern u32 kvm_emulate_mtsrin_orig_ins_offs; +extern u32 kvm_emulate_mtsrin_len; +extern u32 kvm_emulate_mtsrin[]; + +static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtsrin_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4); + p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10); + p[kvm_emulate_mtsrin_reg2_offs] |= rt; + p[kvm_emulate_mtsrin_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + +static void kvm_map_magic_page(void *data) +{ + u32 *features = data; + + ulong in[8]; + ulong out[8]; + + in[0] = KVM_MAGIC_PAGE; + in[1] = KVM_MAGIC_PAGE; + + kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + + *features = out[0]; +} + +static void kvm_check_ins(u32 *inst, u32 features) +{ + u32 _inst = *inst; + u32 inst_no_rt = _inst & ~KVM_MASK_RT; + u32 inst_rt = _inst & KVM_MASK_RT; + + switch (inst_no_rt) { + /* Loads */ + case KVM_INST_MFMSR: + kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); + break; + case KVM_INST_MFSPR_SPRG0: + kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MFSPR_SPRG1: + kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MFSPR_SPRG2: + kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MFSPR_SPRG3: + kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MFSPR_SRR0: + kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MFSPR_SRR1: + kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MFSPR_DAR: + kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MFSPR_DSISR: + kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); + break; + + /* Stores */ + case KVM_INST_MTSPR_SPRG0: + kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MTSPR_SPRG1: + kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MTSPR_SPRG2: + kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MTSPR_SPRG3: + kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MTSPR_SRR0: + kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MTSPR_SRR1: + kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MTSPR_DAR: + kvm_patch_ins_std(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MTSPR_DSISR: + kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); + break; + + /* Nops */ + case KVM_INST_TLBSYNC: + kvm_patch_ins_nop(inst); + break; + + /* Rewrites */ + case KVM_INST_MTMSRD_L1: + kvm_patch_ins_mtmsrd(inst, inst_rt); + break; + case KVM_INST_MTMSR: + case KVM_INST_MTMSRD_L0: + kvm_patch_ins_mtmsr(inst, inst_rt); + break; + } + + switch (inst_no_rt & ~KVM_MASK_RB) { +#ifdef CONFIG_PPC_BOOK3S_32 + case KVM_INST_MTSRIN: + if (features & KVM_MAGIC_FEAT_SR) { + u32 inst_rb = _inst & KVM_MASK_RB; + kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); + } + break; + break; +#endif + } + + switch (_inst) { +#ifdef CONFIG_BOOKE + case KVM_INST_WRTEEI_0: + case KVM_INST_WRTEEI_1: + kvm_patch_ins_wrteei(inst); + break; +#endif + } +} + +static void kvm_use_magic_page(void) +{ + u32 *p; + u32 *start, *end; + u32 tmp; + u32 features; + + /* Tell the host to map the magic page to -4096 on all CPUs */ + on_each_cpu(kvm_map_magic_page, &features, 1); + + /* Quick self-test to see if the mapping works */ + if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + kvm_patching_worked = false; + return; + } + + /* Now loop through all code and find instructions */ + start = (void*)_stext; + end = (void*)_etext; + + for (p = start; p < end; p++) + kvm_check_ins(p, features); + + printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", + kvm_patching_worked ? "worked" : "failed"); +} + +unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl kvm_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +EXPORT_SYMBOL_GPL(kvm_hypercall); + +static int kvm_para_setup(void) +{ + extern u32 kvm_hypercall_start; + struct device_node *hyper_node; + u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -1; + + insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); + if (len % 4) + return -1; + if (len > (4 * 4)) + return -1; + + for (i = 0; i < (len / 4); i++) + kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); + + return 0; +} + +static __init void kvm_free_tmp(void) +{ + unsigned long start, end; + + start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; + end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; + + /* Free the tmp space we don't need */ + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + init_page_count(virt_to_page(start)); + free_page(start); + totalram_pages++; + } +} + +static int __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + goto free_tmp; + + if (kvm_para_setup()) + goto free_tmp; + + if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) + kvm_use_magic_page(); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Enable napping */ + powersave_nap = 1; +#endif + +free_tmp: + kvm_free_tmp(); + + return 0; +} + +postcore_initcall(kvm_guest_init); diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S new file mode 100644 index 00000000000..f2b1b2523e6 --- /dev/null +++ b/arch/powerpc/kernel/kvm_emul.S @@ -0,0 +1,302 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ + +.global kvm_hypercall_start +kvm_hypercall_start: + li r3, -1 + nop + nop + nop + blr + +#define KVM_MAGIC_PAGE (-4096) + +#ifdef CONFIG_64BIT +#define LL64(reg, offs, reg2) ld reg, (offs)(reg2) +#define STL64(reg, offs, reg2) std reg, (offs)(reg2) +#else +#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2) +#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2) +#endif + +#define SCRATCH_SAVE \ + /* Enable critical section. We are critical if \ + shared->critical == r1 */ \ + STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \ + \ + /* Save state */ \ + PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + mfcr r31; \ + stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); + +#define SCRATCH_RESTORE \ + /* Restore state */ \ + PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \ + mtcr r30; \ + PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + \ + /* Disable critical section. We are critical if \ + shared->critical == r1 and r2 is always != r1 */ \ + STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); + +.global kvm_emulate_mtmsrd +kvm_emulate_mtmsrd: + + SCRATCH_SAVE + + /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + lis r30, (~(MSR_EE | MSR_RI))@h + ori r30, r30, (~(MSR_EE | MSR_RI))@l + and r31, r31, r30 + + /* OR the register's (MSR_EE|MSR_RI) on MSR */ +kvm_emulate_mtmsrd_reg: + ori r30, r0, 0 + andi. r30, r30, (MSR_EE|MSR_RI) + or r31, r31, r30 + + /* Put MSR back into magic page */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_check + + /* Check if we may trigger an interrupt */ + andi. r30, r30, MSR_EE + beq no_check + + SCRATCH_RESTORE + + /* Nag hypervisor */ +kvm_emulate_mtmsrd_orig_ins: + tlbsync + + b kvm_emulate_mtmsrd_branch + +no_check: + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsrd_branch: + b . +kvm_emulate_mtmsrd_end: + +.global kvm_emulate_mtmsrd_branch_offs +kvm_emulate_mtmsrd_branch_offs: + .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_reg_offs +kvm_emulate_mtmsrd_reg_offs: + .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_orig_ins_offs +kvm_emulate_mtmsrd_orig_ins_offs: + .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_len +kvm_emulate_mtmsrd_len: + .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 + + +#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS + +.global kvm_emulate_mtmsr +kvm_emulate_mtmsr: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Find the changed bits between old and new MSR */ +kvm_emulate_mtmsr_reg1: + ori r30, r0, 0 + xor r31, r30, r31 + + /* Check if we need to really do mtmsr */ + LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS) + and. r31, r31, r30 + + /* No critical bits changed? Maybe we can stay in the guest. */ + beq maybe_stay_in_guest + +do_mtmsr: + + SCRATCH_RESTORE + + /* Just fire off the mtmsr if it's critical */ +kvm_emulate_mtmsr_orig_ins: + mtmsr r0 + + b kvm_emulate_mtmsr_branch + +maybe_stay_in_guest: + + /* Get the target register in r30 */ +kvm_emulate_mtmsr_reg2: + ori r30, r0, 0 + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_mtmsr + + /* Check if we may trigger an interrupt */ + andi. r31, r30, MSR_EE + beq no_mtmsr + + b do_mtmsr + +no_mtmsr: + + /* Put MSR into magic page because we don't call mtmsr */ + STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsr_branch: + b . +kvm_emulate_mtmsr_end: + +.global kvm_emulate_mtmsr_branch_offs +kvm_emulate_mtmsr_branch_offs: + .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg1_offs +kvm_emulate_mtmsr_reg1_offs: + .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg2_offs +kvm_emulate_mtmsr_reg2_offs: + .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_orig_ins_offs +kvm_emulate_mtmsr_orig_ins_offs: + .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_len +kvm_emulate_mtmsr_len: + .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 + + + +.global kvm_emulate_wrteei +kvm_emulate_wrteei: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Remove MSR_EE from old MSR */ + li r30, 0 + ori r30, r30, MSR_EE + andc r31, r31, r30 + + /* OR new MSR_EE onto the old MSR */ +kvm_emulate_wrteei_ee: + ori r31, r31, 0 + + /* Write new MSR value back */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_wrteei_branch: + b . +kvm_emulate_wrteei_end: + +.global kvm_emulate_wrteei_branch_offs +kvm_emulate_wrteei_branch_offs: + .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_ee_offs +kvm_emulate_wrteei_ee_offs: + .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_len +kvm_emulate_wrteei_len: + .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4 + + +.global kvm_emulate_mtsrin +kvm_emulate_mtsrin: + + SCRATCH_SAVE + + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + andi. r31, r31, MSR_DR | MSR_IR + beq kvm_emulate_mtsrin_reg1 + + SCRATCH_RESTORE + +kvm_emulate_mtsrin_orig_ins: + nop + b kvm_emulate_mtsrin_branch + +kvm_emulate_mtsrin_reg1: + /* rX >> 26 */ + rlwinm r30,r0,6,26,29 + +kvm_emulate_mtsrin_reg2: + stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtsrin_branch: + b . +kvm_emulate_mtsrin_end: + +.global kvm_emulate_mtsrin_branch_offs +kvm_emulate_mtsrin_branch_offs: + .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg1_offs +kvm_emulate_mtsrin_reg1_offs: + .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg2_offs +kvm_emulate_mtsrin_reg2_offs: + .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_orig_ins_offs +kvm_emulate_mtsrin_orig_ins_offs: + .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_len +kvm_emulate_mtsrin_len: + .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 73c0a3f64ed..74d0e742114 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -43,7 +43,7 @@ int kvmppc_core_check_processor_compat(void) { int r; - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) r = 0; else r = -ENOTSUPP; @@ -72,6 +72,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Since the guest can directly access the timebase, it must know the * real timebase frequency. Accordingly, it must see the state of * CCR1[TCS]. */ + /* XXX CCR1 doesn't exist on all 440 SoCs. */ vcpu->arch.ccr1 = mfspr(SPRN_CCR1); for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) @@ -123,8 +124,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_vcpu; + return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu_44x); out: @@ -135,6 +142,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + free_page((unsigned long)vcpu->arch.shared); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 9b9b5cdea84..5f3cff83e08 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -47,6 +47,7 @@ #ifdef DEBUG void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); struct kvmppc_44x_tlbe *tlbe; int i; @@ -221,14 +222,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } @@ -354,7 +355,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.msr & MSR_PR); + vcpu->arch.shared->msr & MSR_PR); stlbe.tid = !(asid & 0xff); /* Keep track of the reference so we can properly release it later. */ @@ -423,7 +424,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, /* Does it match current guest AS? */ /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; gpa = get_tlb_raddr(tlbe); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a3cef30d1d4..e316847c08c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -17,6 +17,7 @@ #include <linux/kvm_host.h> #include <linux/err.h> #include <linux/slab.h> +#include "trace.h" #include <asm/reg.h> #include <asm/cputable.h> @@ -35,7 +36,6 @@ #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ -/* #define EXIT_DEBUG_SIMPLE */ /* #define DEBUG_EXT */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, @@ -105,65 +105,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } -#if defined(EXIT_DEBUG) -static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) -{ - u64 jd = mftb() - vcpu->arch.dec_jiffies; - return vcpu->arch.dec - jd; -} -#endif - static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { - vcpu->arch.shadow_msr = vcpu->arch.msr; + ulong smsr = vcpu->arch.shared->msr; + /* Guest MSR values */ - vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | - MSR_BE | MSR_DE; + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; /* Process MSR values */ - vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | - MSR_EE; + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; /* External providers the guest reserved */ - vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); + smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 - vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; + smsr |= MSR_ISF | MSR_HV; #endif + vcpu->arch.shadow_msr = smsr; } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { - ulong old_msr = vcpu->arch.msr; + ulong old_msr = vcpu->arch.shared->msr; #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif msr &= to_book3s(vcpu)->msr_mask; - vcpu->arch.msr = msr; + vcpu->arch.shared->msr = msr; kvmppc_recalc_shadow_msr(vcpu); - if (msr & (MSR_WE|MSR_POW)) { + if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_block(vcpu); vcpu->stat.halt_wakeup++; + + /* Unset POW bit after we woke up */ + msr &= ~MSR_POW; + vcpu->arch.shared->msr = msr; } } - if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != + if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + + /* Preload magic page segment when in kernel mode */ + if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { + struct kvm_vcpu_arch *a = &vcpu->arch; + + if (msr & MSR_DR) + kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); + else + kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); + } } /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) + if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - vcpu->arch.srr0 = kvmppc_get_pc(vcpu); - vcpu->arch.srr1 = vcpu->arch.msr | flags; + vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -180,6 +186,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; + case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break; case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; @@ -199,6 +206,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, { clear_bit(kvmppc_book3s_vec2irqprio(vec), &vcpu->arch.pending_exceptions); + + if (!vcpu->arch.pending_exceptions) + vcpu->arch.shared->int_pending = 0; } void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) @@ -237,13 +247,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); + unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; + + kvmppc_book3s_queue_irqprio(vcpu, vec); } void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -251,14 +267,29 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) int deliver = 1; int vec = 0; ulong flags = 0ULL; + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: - deliver = vcpu->arch.msr & MSR_EE; + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_DECREMENTER; break; case BOOK3S_IRQPRIO_EXTERNAL: - deliver = vcpu->arch.msr & MSR_EE; + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_EXTERNAL; break; case BOOK3S_IRQPRIO_SYSTEM_RESET: @@ -320,9 +351,27 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) return deliver; } +/* + * This function determines if an irqprio should be cleared once issued. + */ +static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) +{ + switch (priority) { + case BOOK3S_IRQPRIO_DECREMENTER: + /* DEC interrupts get cleared by mtdec */ + return false; + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + /* External interrupts get cleared by userspace */ + return false; + } + + return true; +} + void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long old_pending = vcpu->arch.pending_exceptions; unsigned int priority; #ifdef EXIT_DEBUG @@ -332,8 +381,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) priority = __ffs(*pending); while (priority < BOOK3S_IRQPRIO_MAX) { if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && - (priority != BOOK3S_IRQPRIO_DECREMENTER)) { - /* DEC interrupts get cleared by mtdec */ + clear_irqprio(vcpu, priority)) { clear_bit(priority, &vcpu->arch.pending_exceptions); break; } @@ -342,6 +390,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) BITS_PER_BYTE * sizeof(*pending), priority + 1); } + + /* Tell the guest about our interrupt status */ + if (*pending) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; } void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) @@ -398,6 +452,25 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) } } +pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + ulong mp_pa = vcpu->arch.magic_page_pa; + + /* Magic page override */ + if (unlikely(mp_pa) && + unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == + ((mp_pa & PAGE_MASK) & KVM_PAM))) { + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + return pfn; + } + + return gfn_to_pfn(vcpu->kvm, gfn); +} + /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to * emulate 32 bytes dcbz length. @@ -415,8 +488,10 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) int i; hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) + if (is_error_page(hpage)) { + kvm_release_page_clean(hpage); return; + } hpage_offset = pte->raddr & ~PAGE_MASK; hpage_offset &= ~0xFFFULL; @@ -437,14 +512,14 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, struct kvmppc_pte *pte) { - int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR)); + int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); int r; if (relocated) { r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); } else { pte->eaddr = eaddr; - pte->raddr = eaddr & 0xffffffff; + pte->raddr = eaddr & KVM_PAM; pte->vpage = VSID_REAL | eaddr >> 12; pte->may_read = true; pte->may_write = true; @@ -533,6 +608,13 @@ mmio: static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) { + ulong mp_pa = vcpu->arch.magic_page_pa; + + if (unlikely(mp_pa) && + unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + return 1; + } + return kvm_is_visible_gfn(vcpu->kvm, gfn); } @@ -545,8 +627,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int page_found = 0; struct kvmppc_pte pte; bool is_mmio = false; - bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; - bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; + bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; u64 vsid; relocated = data ? dr : ir; @@ -558,12 +640,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.may_execute = true; pte.may_read = true; pte.may_write = true; - pte.raddr = eaddr & 0xffffffff; + pte.raddr = eaddr & KVM_PAM; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; } - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; @@ -571,7 +653,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); - if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) + if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); else pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); @@ -594,20 +676,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (page_found == -ENOENT) { /* Page not found in guest PTE entries */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; - vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EPERM) { /* Storage protection */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; - to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; - vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = + to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { @@ -695,9 +780,11 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu) ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); if (ret == -ENOENT) { - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); + ulong msr = vcpu->arch.shared->msr; + + msr = kvmppc_set_field(msr, 33, 33, 1); + msr = kvmppc_set_field(msr, 34, 36, 0); + vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); return EMULATE_AGAIN; } @@ -736,7 +823,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) return RESUME_GUEST; - if (!(vcpu->arch.msr & msr)) { + if (!(vcpu->arch.shared->msr & msr)) { kvmppc_book3s_queue_irqprio(vcpu, exit_nr); return RESUME_GUEST; } @@ -796,16 +883,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; -#ifdef EXIT_DEBUG - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", - exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), - kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); -#elif defined (EXIT_DEBUG_SIMPLE) - if ((exit_nr != 0x900) && (exit_nr != 0x500)) - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", - exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), - vcpu->arch.msr); -#endif + + trace_kvm_book3s_exit(exit_nr, vcpu); kvm_resched(vcpu); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -836,9 +915,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } else { - vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; + vcpu->arch.shared->msr |= + to_svcpu(vcpu)->shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } break; @@ -861,17 +940,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); } else { - vcpu->arch.dear = dar; - to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.shared->dar = dar; + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); r = RESUME_GUEST; } break; } case BOOK3S_INTERRUPT_DATA_SEGMENT: if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { - vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_SEGMENT); } @@ -904,7 +982,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, program_interrupt: flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { #ifdef EXIT_DEBUG printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); #endif @@ -941,10 +1019,10 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: - // XXX make user settable if (vcpu->arch.osi_enabled && (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + /* MOL hypercalls */ u64 *gprs = run->osi.gprs; int i; @@ -953,8 +1031,13 @@ program_interrupt: gprs[i] = kvmppc_get_gpr(vcpu, i); vcpu->arch.osi_needed = 1; r = RESUME_HOST_NV; - + } else if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; } else { + /* Guest syscalls */ vcpu->stat.syscall_exits++; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); r = RESUME_GUEST; @@ -989,9 +1072,9 @@ program_interrupt: } case BOOK3S_INTERRUPT_ALIGNMENT: if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, + vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, kvmppc_get_last_inst(vcpu)); - vcpu->arch.dear = kvmppc_alignment_dar(vcpu, + vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, kvmppc_get_last_inst(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, exit_nr); } @@ -1031,9 +1114,7 @@ program_interrupt: } } -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); -#endif + trace_kvm_book3s_reenter(r, vcpu); return r; } @@ -1052,14 +1133,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->ctr = kvmppc_get_ctr(vcpu); regs->lr = kvmppc_get_lr(vcpu); regs->xer = kvmppc_get_xer(vcpu); - regs->msr = vcpu->arch.msr; - regs->srr0 = vcpu->arch.srr0; - regs->srr1 = vcpu->arch.srr1; + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.sprg0; - regs->sprg1 = vcpu->arch.sprg1; - regs->sprg2 = vcpu->arch.sprg2; - regs->sprg3 = vcpu->arch.sprg3; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; regs->sprg5 = vcpu->arch.sprg4; regs->sprg6 = vcpu->arch.sprg5; regs->sprg7 = vcpu->arch.sprg6; @@ -1080,12 +1161,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_lr(vcpu, regs->lr); kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.srr0 = regs->srr0; - vcpu->arch.srr1 = regs->srr1; - vcpu->arch.sprg0 = regs->sprg0; - vcpu->arch.sprg1 = regs->sprg1; - vcpu->arch.sprg2 = regs->sprg2; - vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; vcpu->arch.sprg5 = regs->sprg4; vcpu->arch.sprg6 = regs->sprg5; vcpu->arch.sprg7 = regs->sprg6; @@ -1111,10 +1192,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv; } } else { - for (i = 0; i < 16; i++) { - sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; - sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; - } + for (i = 0; i < 16; i++) + sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; + for (i = 0; i < 8; i++) { sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; @@ -1225,6 +1305,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) struct kvmppc_vcpu_book3s *vcpu_book3s; struct kvm_vcpu *vcpu; int err = -ENOMEM; + unsigned long p; vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); if (!vcpu_book3s) @@ -1242,6 +1323,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_shadow_vcpu; + p = __get_free_page(GFP_KERNEL|__GFP_ZERO); + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); + if (!p) + goto uninit_vcpu; + vcpu->arch.host_retip = kvm_return_point; vcpu->arch.host_msr = mfmsr(); #ifdef CONFIG_PPC_BOOK3S_64 @@ -1268,10 +1355,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) err = kvmppc_mmu_init(vcpu); if (err < 0) - goto free_shadow_vcpu; + goto uninit_vcpu; return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_shadow_vcpu: kfree(vcpu_book3s->shadow_vcpu); free_vcpu: @@ -1284,6 +1373,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); kfree(vcpu_book3s->shadow_vcpu); vfree(vcpu_book3s); @@ -1346,7 +1436,7 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_enable(); /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) + if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); ret = __kvmppc_vcpu_entry(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 3292d76101d..c8cefdd15fd 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -58,14 +58,39 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) #endif } +static inline u32 sr_vsid(u32 sr_raw) +{ + return sr_raw & 0x0fffffff; +} + +static inline bool sr_valid(u32 sr_raw) +{ + return (sr_raw & 0x80000000) ? false : true; +} + +static inline bool sr_ks(u32 sr_raw) +{ + return (sr_raw & 0x40000000) ? true: false; +} + +static inline bool sr_kp(u32 sr_raw) +{ + return (sr_raw & 0x20000000) ? true: false; +} + +static inline bool sr_nx(u32 sr_raw) +{ + return (sr_raw & 0x10000000) ? true: false; +} + static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); -static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) +static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr) { - return &vcpu_book3s->sr[(eaddr >> 28) & 0xf]; + return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf]; } static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, @@ -87,7 +112,7 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) } static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, - struct kvmppc_sr *sre, gva_t eaddr, + u32 sre, gva_t eaddr, bool primary) { u32 page, hash, pteg, htabmask; @@ -96,7 +121,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 page = (eaddr & 0x0FFFFFFF) >> 12; htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0; - hash = ((sre->vsid ^ page) << 6); + hash = ((sr_vsid(sre) ^ page) << 6); if (!primary) hash = ~hash; hash &= htabmask; @@ -104,8 +129,8 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash; dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n", - vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg, - sre->vsid); + kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, + sr_vsid(sre)); r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); if (kvm_is_error_hva(r)) @@ -113,10 +138,9 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 return r | (pteg & ~PAGE_MASK); } -static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr, - bool primary) +static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary) { - return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) | + return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) | (primary ? 0 : 0x40) | 0x80000000; } @@ -133,7 +157,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, else bat = &vcpu_book3s->ibat[i]; - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { if (!bat->vp) continue; } else { @@ -180,17 +204,17 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, bool primary) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - struct kvmppc_sr *sre; + u32 sre; hva_t ptegp; u32 pteg[16]; u32 ptem = 0; int i; int found = 0; - sre = find_sr(vcpu_book3s, eaddr); + sre = find_sr(vcpu, eaddr); dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28, - sre->vsid, sre->raw); + sr_vsid(sre), sre); pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); @@ -214,8 +238,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF); pp = pteg[i+1] & 3; - if ((sre->Kp && (vcpu->arch.msr & MSR_PR)) || - (sre->Ks && !(vcpu->arch.msr & MSR_PR))) + if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) || + (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR))) pp |= 4; pte->may_write = false; @@ -269,7 +293,7 @@ no_page_found: dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n", to_book3s(vcpu)->sdr1, ptegp); for (i=0; i<16; i+=2) { - dprintk_pte(" %02d: 0x%x - 0x%x (0x%llx)\n", + dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n", i, pteg[i], pteg[i+1], ptem); } } @@ -281,8 +305,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data) { int r; + ulong mp_ea = vcpu->arch.magic_page_ea; pte->eaddr = eaddr; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); + pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff); + pte->raddr &= KVM_PAM; + pte->may_execute = true; + pte->may_read = true; + pte->may_write = true; + + return 0; + } + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); @@ -295,30 +335,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum) { - return to_book3s(vcpu)->sr[srnum].raw; + return vcpu->arch.shared->sr[srnum]; } static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, ulong value) { - struct kvmppc_sr *sre; - - sre = &to_book3s(vcpu)->sr[srnum]; - - /* Flush any left-over shadows from the previous SR */ - - /* XXX Not necessary? */ - /* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */ - - /* And then put in the new SR */ - sre->raw = value; - sre->vsid = (value & 0x0fffffff); - sre->valid = (value & 0x80000000) ? false : true; - sre->Ks = (value & 0x40000000) ? true : false; - sre->Kp = (value & 0x20000000) ? true : false; - sre->nx = (value & 0x10000000) ? true : false; - - /* Map the new segment */ + vcpu->arch.shared->sr[srnum] = value; kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT); } @@ -331,19 +354,19 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid) { ulong ea = esid << SID_SHIFT; - struct kvmppc_sr *sr; + u32 sr; u64 gvsid = esid; - if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - sr = find_sr(to_book3s(vcpu), ea); - if (sr->valid) - gvsid = sr->vsid; + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + sr = find_sr(vcpu, ea); + if (sr_valid(sr)) + gvsid = sr_vsid(sr); } /* In case we only have one of MSR_IR or MSR_DR set, let's put that in the real-mode context (and hope RM doesn't access high memory) */ - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: *vsid = VSID_REAL | esid; break; @@ -354,8 +377,8 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, *vsid = VSID_REAL_DR | gvsid; break; case MSR_DR|MSR_IR: - if (sr->valid) - *vsid = sr->vsid; + if (sr_valid(sr)) + *vsid = sr_vsid(sr); else *vsid = VSID_BAT | gvsid; break; @@ -363,7 +386,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, BUG(); } - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) *vsid |= VSID_PR; return 0; diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 0b51ef872c1..9fecbfbce77 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -19,7 +19,6 @@ */ #include <linux/kvm_host.h> -#include <linux/hash.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -77,7 +76,14 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return hash_64(gvsid, SID_MAP_BITS); + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); } @@ -86,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; u16 sid_map_mask; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); @@ -147,8 +153,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) struct hpte_cache *pte; /* Get host physical address for gpa */ - hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpaddr)) { + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); return -EINVAL; @@ -253,7 +259,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) u16 sid_map_mask; static int backwards_map = 0; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; /* We might get collisions that trap in preceding order, so let's @@ -269,18 +275,15 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) backwards_map = !backwards_map; /* Uh-oh ... out of mappings. Let's flush! */ - if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { - vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; + if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) { + vcpu_book3s->vsid_next = 0; memset(vcpu_book3s->sid_map, 0, sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_flush_segments(vcpu); } - map->host_vsid = vcpu_book3s->vsid_next; - - /* Would have to be 111 to be completely aligned with the rest of - Linux, but that is just way too little space! */ - vcpu_book3s->vsid_next+=1; + map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next]; + vcpu_book3s->vsid_next++; map->guest_vsid = gvsid; map->valid = true; @@ -327,40 +330,38 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { + int i; + kvmppc_mmu_hpte_destroy(vcpu); preempt_disable(); - __destroy_context(to_book3s(vcpu)->context_id); + for (i = 0; i < SID_CONTEXTS; i++) + __destroy_context(to_book3s(vcpu)->context_id[i]); preempt_enable(); } /* From mm/mmu_context_hash32.c */ -#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) int kvmppc_mmu_init(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int err; ulong sdr1; + int i; + int j; - err = __init_new_context(); - if (err < 0) - return -1; - vcpu3s->context_id = err; - - vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; - vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); - -#if 0 /* XXX still doesn't guarantee uniqueness */ - /* We could collide with the Linux vsid space because the vsid - * wraps around at 24 bits. We're safe if we do our own space - * though, so let's always set the highest bit. */ + for (i = 0; i < SID_CONTEXTS; i++) { + err = __init_new_context(); + if (err < 0) + goto init_fail; + vcpu3s->context_id[i] = err; - vcpu3s->vsid_max |= 0x00800000; - vcpu3s->vsid_first |= 0x00800000; -#endif - BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); + /* Remember context id for this combination */ + for (j = 0; j < 16; j++) + vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j); + } - vcpu3s->vsid_next = vcpu3s->vsid_first; + vcpu3s->vsid_next = 0; /* Remember where the HTAB is */ asm ( "mfsdr1 %0" : "=r"(sdr1) ); @@ -370,4 +371,14 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) kvmppc_mmu_hpte_init(vcpu); return 0; + +init_fail: + for (j = 0; j < i; j++) { + if (!vcpu3s->context_id[j]) + continue; + + __destroy_context(to_book3s(vcpu)->context_id[j]); + } + + return -1; } diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 4025ea26b3c..d7889ef3211 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -163,6 +163,22 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, bool found = false; bool perm_err = false; int second = 0; + ulong mp_ea = vcpu->arch.magic_page_ea; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); + gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff); + gpte->raddr &= KVM_PAM; + gpte->may_execute = true; + gpte->may_read = true; + gpte->may_write = true; + + return 0; + } slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr); if (!slbe) @@ -180,9 +196,9 @@ do_second: goto no_page_found; } - if ((vcpu->arch.msr & MSR_PR) && slbe->Kp) + if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp) key = 4; - else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks) + else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks) key = 4; for (i=0; i<16; i+=2) { @@ -381,7 +397,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) for (i = 1; i < vcpu_book3s->slb_nr; i++) vcpu_book3s->slb[i].valid = false; - if (vcpu->arch.msr & MSR_IR) { + if (vcpu->arch.shared->msr & MSR_IR) { kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); } @@ -445,14 +461,15 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, ulong ea = esid << SID_SHIFT; struct kvmppc_slb *slb; u64 gvsid = esid; + ulong mp_ea = vcpu->arch.magic_page_ea; - if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); if (slb) gvsid = slb->vsid; } - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: *vsid = VSID_REAL | esid; break; @@ -464,7 +481,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, break; case MSR_DR|MSR_IR: if (!slb) - return -ENOENT; + goto no_slb; *vsid = gvsid; break; @@ -473,10 +490,21 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, break; } - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) *vsid |= VSID_PR; return 0; + +no_slb: + /* Catch magic page case */ + if (unlikely(mp_ea) && + unlikely(esid == (mp_ea >> SID_SHIFT)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + *vsid = VSID_REAL | esid; + return 0; + } + + return -EINVAL; } static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 384179a5002..fa2f08434ba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -20,7 +20,6 @@ */ #include <linux/kvm_host.h> -#include <linux/hash.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -28,24 +27,9 @@ #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> +#include "trace.h" #define PTE_SIZE 12 -#define VSID_ALL 0 - -/* #define DEBUG_MMU */ -/* #define DEBUG_SLB */ - -#ifdef DEBUG_MMU -#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_mmu(a, ...) do { } while(0) -#endif - -#ifdef DEBUG_SLB -#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_slb(a, ...) do { } while(0) -#endif void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { @@ -58,34 +42,39 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) * a hash, so we don't waste cycles on looping */ static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) { - return hash_64(gvsid, SID_MAP_BITS); + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); } + static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) { struct kvmppc_sid_map *map; u16 sid_map_mask; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); map = &to_book3s(vcpu)->sid_map[sid_map_mask]; - if (map->guest_vsid == gvsid) { - dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n", - gvsid, map->host_vsid); + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); return map; } map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; - if (map->guest_vsid == gvsid) { - dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", - gvsid, map->host_vsid); + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); return map; } - dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n", - sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid); + trace_kvm_book3s_slb_fail(sid_map_mask, gvsid); return NULL; } @@ -101,18 +90,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) struct kvmppc_sid_map *map; /* Get host physical address for gpa */ - hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpaddr)) { + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); return -EINVAL; } hpaddr <<= PAGE_SHIFT; -#if PAGE_SHIFT == 12 -#elif PAGE_SHIFT == 16 - hpaddr |= orig_pte->raddr & 0xf000; -#else -#error Unknown page size -#endif + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); /* and write the mapping ea -> hpa into the pt */ vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); @@ -161,10 +145,7 @@ map_again: } else { struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); - dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", - ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', - (rflags & HPTE_R_N) ? '-' : 'x', - orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); + trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte); /* The ppc_md code may give us a secondary entry even though we asked for a primary. Fix up. */ @@ -191,7 +172,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) u16 sid_map_mask; static int backwards_map = 0; - if (vcpu->arch.msr & MSR_PR) + if (vcpu->arch.shared->msr & MSR_PR) gvsid |= VSID_PR; /* We might get collisions that trap in preceding order, so let's @@ -219,8 +200,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) map->guest_vsid = gvsid; map->valid = true; - dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n", - sid_map_mask, gvsid, map->host_vsid); + trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid); return map; } @@ -292,7 +272,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; - dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); + trace_kvm_book3s_slbmte(slb_vsid, slb_esid); return 0; } @@ -306,7 +286,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { kvmppc_mmu_hpte_destroy(vcpu); - __destroy_context(to_book3s(vcpu)->context_id); + __destroy_context(to_book3s(vcpu)->context_id[0]); } int kvmppc_mmu_init(struct kvm_vcpu *vcpu) @@ -317,10 +297,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) err = __init_new_context(); if (err < 0) return -1; - vcpu3s->context_id = err; + vcpu3s->context_id[0] = err; - vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; - vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; + vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1; + vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; vcpu3s->vsid_next = vcpu3s->vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index c85f906038c..46684655708 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -73,8 +73,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case OP_19_XOP_RFID: case OP_19_XOP_RFI: - kvmppc_set_pc(vcpu, vcpu->arch.srr0); - kvmppc_set_msr(vcpu, vcpu->arch.srr1); + kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0); + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); *advance = 0; break; @@ -86,14 +86,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case 31: switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr); + kvmppc_set_gpr(vcpu, get_rt(inst), + vcpu->arch.shared->msr); break; case OP_31_XOP_MTMSRD: { ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); if (inst & 0x10000) { - vcpu->arch.msr &= ~(MSR_RI | MSR_EE); - vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); + vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); + vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); } else kvmppc_set_msr(vcpu, rs); break; @@ -204,14 +205,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, ra = kvmppc_get_gpr(vcpu, get_ra(inst)); addr = (ra + rb) & ~31ULL; - if (!(vcpu->arch.msr & MSR_SF)) + if (!(vcpu->arch.shared->msr & MSR_SF)) addr &= 0xffffffff; vaddr = addr; r = kvmppc_st(vcpu, &addr, 32, zeros, true); if ((r == -ENOENT) || (r == -EPERM)) { *advance = 0; - vcpu->arch.dear = vaddr; + vcpu->arch.shared->dar = vaddr; to_svcpu(vcpu)->fault_dar = vaddr; dsisr = DSISR_ISSTORE; @@ -220,7 +221,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, else if (r == -EPERM) dsisr |= DSISR_PROTFAULT; - to_book3s(vcpu)->dsisr = dsisr; + vcpu->arch.shared->dsisr = dsisr; to_svcpu(vcpu)->fault_dsisr = dsisr; kvmppc_book3s_queue_irqprio(vcpu, @@ -263,7 +264,7 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, } } -static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) +static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_bat *bat; @@ -285,35 +286,7 @@ static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) BUG(); } - if (sprn % 2) - return bat->raw >> 32; - else - return bat->raw; -} - -static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) -{ - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - struct kvmppc_bat *bat; - - switch (sprn) { - case SPRN_IBAT0U ... SPRN_IBAT3L: - bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; - break; - case SPRN_IBAT4U ... SPRN_IBAT7L: - bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; - break; - case SPRN_DBAT0U ... SPRN_DBAT3L: - bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; - break; - case SPRN_DBAT4U ... SPRN_DBAT7L: - bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; - break; - default: - BUG(); - } - - kvmppc_set_bat(vcpu, bat, !(sprn % 2), val); + return bat; } int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) @@ -326,10 +299,10 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) to_book3s(vcpu)->sdr1 = spr_val; break; case SPRN_DSISR: - to_book3s(vcpu)->dsisr = spr_val; + vcpu->arch.shared->dsisr = spr_val; break; case SPRN_DAR: - vcpu->arch.dear = spr_val; + vcpu->arch.shared->dar = spr_val; break; case SPRN_HIOR: to_book3s(vcpu)->hior = spr_val; @@ -338,12 +311,16 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IBAT4U ... SPRN_IBAT7L: case SPRN_DBAT0U ... SPRN_DBAT3L: case SPRN_DBAT4U ... SPRN_DBAT7L: - kvmppc_write_bat(vcpu, sprn, (u32)spr_val); + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val); /* BAT writes happen so rarely that we're ok to flush * everything here */ kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_flush_segments(vcpu); break; + } case SPRN_HID0: to_book3s(vcpu)->hid[0] = spr_val; break; @@ -433,16 +410,24 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IBAT4U ... SPRN_IBAT7L: case SPRN_DBAT0U ... SPRN_DBAT3L: case SPRN_DBAT4U ... SPRN_DBAT7L: - kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + if (sprn % 2) + kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); + else + kvmppc_set_gpr(vcpu, rt, bat->raw); + break; + } case SPRN_SDR1: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); break; case SPRN_DSISR: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); break; case SPRN_DAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; case SPRN_HIOR: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 4868d4a7ebc..79751d8dd13 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -21,6 +21,7 @@ #include <linux/kvm_host.h> #include <linux/hash.h> #include <linux/slab.h> +#include "trace.h" #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -30,14 +31,6 @@ #define PTE_SIZE 12 -/* #define DEBUG_MMU */ - -#ifdef DEBUG_MMU -#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_mmu(a, ...) do { } while(0) -#endif - static struct kmem_cache *hpte_cache; static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) @@ -45,6 +38,12 @@ static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); } +static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr) +{ + return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE, + HPTEG_HASH_BITS_PTE_LONG); +} + static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) { return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); @@ -60,77 +59,128 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; + trace_kvm_book3s_mmu_map(pte); + + spin_lock(&vcpu->arch.mmu_lock); + /* Add to ePTE list */ index = kvmppc_mmu_hash_pte(pte->pte.eaddr); - hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + + /* Add to ePTE_long list */ + index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); + hlist_add_head_rcu(&pte->list_pte_long, + &vcpu->arch.hpte_hash_pte_long[index]); /* Add to vPTE list */ index = kvmppc_mmu_hash_vpte(pte->pte.vpage); - hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); + hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); /* Add to vPTE_long list */ index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); - hlist_add_head(&pte->list_vpte_long, - &vcpu->arch.hpte_hash_vpte_long[index]); + hlist_add_head_rcu(&pte->list_vpte_long, + &vcpu->arch.hpte_hash_vpte_long[index]); + + spin_unlock(&vcpu->arch.mmu_lock); +} + +static void free_pte_rcu(struct rcu_head *head) +{ + struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); + kmem_cache_free(hpte_cache, pte); } static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", - pte->pte.eaddr, pte->pte.vpage, pte->host_va); + trace_kvm_book3s_mmu_invalidate(pte); /* Different for 32 and 64 bit */ kvmppc_mmu_invalidate_pte(vcpu, pte); + spin_lock(&vcpu->arch.mmu_lock); + + /* pte already invalidated in between? */ + if (hlist_unhashed(&pte->list_pte)) { + spin_unlock(&vcpu->arch.mmu_lock); + return; + } + + hlist_del_init_rcu(&pte->list_pte); + hlist_del_init_rcu(&pte->list_pte_long); + hlist_del_init_rcu(&pte->list_vpte); + hlist_del_init_rcu(&pte->list_vpte_long); + if (pte->pte.may_write) kvm_release_pfn_dirty(pte->pfn); else kvm_release_pfn_clean(pte->pfn); - hlist_del(&pte->list_pte); - hlist_del(&pte->list_vpte); - hlist_del(&pte->list_vpte_long); + spin_unlock(&vcpu->arch.mmu_lock); vcpu->arch.hpte_cache_count--; - kmem_cache_free(hpte_cache, pte); + call_rcu(&pte->rcu_head, free_pte_rcu); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { struct hpte_cache *pte; - struct hlist_node *node, *tmp; + struct hlist_node *node; int i; + rcu_read_lock(); + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) + hlist_for_each_entry_rcu(pte, node, list, list_pte) if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } -void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { - u64 i; + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", - vcpu->arch.hpte_cache_count, guest_ea, ea_mask); + /* Find the list of entries in the map */ + list = &vcpu->arch.hpte_hash_pte_long[ + kvmppc_mmu_hash_pte_long(guest_ea)]; + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +{ + trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask); guest_ea &= ea_mask; switch (ea_mask) { @@ -138,9 +188,7 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) kvmppc_mmu_pte_flush_page(vcpu, guest_ea); break; case 0x0ffff000: - /* 32-bit flush w/o segment, go through all possible segments */ - for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) - kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); + kvmppc_mmu_pte_flush_long(vcpu, guest_ea); break; case 0: /* Doing a complete flush -> start from scratch */ @@ -156,39 +204,46 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) + hlist_for_each_entry_rcu(pte, node, list, list_vpte) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } /* Flush with mask 0xffffff000 */ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; list = &vcpu->arch.hpte_hash_vpte_long[ kvmppc_mmu_hash_vpte_long(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) { - dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", - vcpu->arch.hpte_cache_count, guest_vp, vp_mask); + trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask); guest_vp &= vp_mask; switch(vp_mask) { @@ -206,21 +261,24 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; int i; - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", - vcpu->arch.hpte_cache_count, pa_start, pa_end); + trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end); + + rcu_read_lock(); for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && (pte->pte.raddr < pa_end)) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) @@ -254,11 +312,15 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) /* init hpte lookup hashes */ kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long, + ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); + spin_lock_init(&vcpu->arch.mmu_lock); + return 0; } diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 35a701f3ece..7b0ee96c1be 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -165,14 +165,15 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) { u64 dsisr; + struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); - vcpu->arch.dear = eaddr; + shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0); + shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0); + shared->dar = eaddr; /* Page Fault */ dsisr = kvmppc_set_field(0, 33, 33, 1); if (is_store) - to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); + shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); } @@ -658,7 +659,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; - if (!(vcpu->arch.msr & MSR_FP)) { + if (!(vcpu->arch.shared->msr & MSR_FP)) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); return EMULATE_AGAIN; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 506d5c316c9..2b9c9088d00 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -202,8 +202,25 @@ _GLOBAL(kvmppc_rmcall) #if defined(CONFIG_PPC_BOOK3S_32) #define STACK_LR INT_FRAME_SIZE+4 + +/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */ +#define MSR_EXT_START \ + PPC_STL r20, _NIP(r1); \ + mfmsr r20; \ + LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ + andc r3,r20,r3; /* Disable DR,EE */ \ + mtmsr r3; \ + sync + +#define MSR_EXT_END \ + mtmsr r20; /* Enable DR,EE */ \ + sync; \ + PPC_LL r20, _NIP(r1) + #elif defined(CONFIG_PPC_BOOK3S_64) #define STACK_LR _LINK +#define MSR_EXT_START +#define MSR_EXT_END #endif /* @@ -215,19 +232,12 @@ _GLOBAL(kvmppc_load_up_ ## what); \ PPC_STLU r1, -INT_FRAME_SIZE(r1); \ mflr r3; \ PPC_STL r3, STACK_LR(r1); \ - PPC_STL r20, _NIP(r1); \ - mfmsr r20; \ - LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ - andc r3,r20,r3; /* Disable DR,EE */ \ - mtmsr r3; \ - sync; \ + MSR_EXT_START; \ \ bl FUNC(load_up_ ## what); \ \ - mtmsr r20; /* Enable DR,EE */ \ - sync; \ + MSR_EXT_END; \ PPC_LL r3, STACK_LR(r1); \ - PPC_LL r20, _NIP(r1); \ mtlr r3; \ addi r1, r1, INT_FRAME_SIZE; \ blr @@ -242,10 +252,10 @@ define_load_up(vsx) .global kvmppc_trampoline_lowmem kvmppc_trampoline_lowmem: - .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START + PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START .global kvmppc_trampoline_enter kvmppc_trampoline_enter: - .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START + PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8d4e35f5372..77575d08c81 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -62,9 +62,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); + printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr); printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0, + vcpu->arch.shared->srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); @@ -130,13 +131,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); + unsigned int prio = BOOKE_IRQPRIO_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL; + + kvmppc_booke_queue_irqprio(vcpu, prio); } void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); + clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } /* Deliver the interrupt of the corresponding priority, if possible. */ @@ -146,6 +153,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, int allowed = 0; ulong uninitialized_var(msr_mask); bool update_esr = false, update_dear = false; + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + bool keep_irq = false; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + + if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) { + priority = BOOKE_IRQPRIO_EXTERNAL; + keep_irq = true; + } switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: @@ -169,36 +196,38 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, break; case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_WATCHDOG: - allowed = vcpu->arch.msr & MSR_CE; + allowed = vcpu->arch.shared->msr & MSR_CE; msr_mask = MSR_ME; break; case BOOKE_IRQPRIO_MACHINE_CHECK: - allowed = vcpu->arch.msr & MSR_ME; + allowed = vcpu->arch.shared->msr & MSR_ME; msr_mask = 0; break; case BOOKE_IRQPRIO_EXTERNAL: case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: - allowed = vcpu->arch.msr & MSR_EE; + allowed = vcpu->arch.shared->msr & MSR_EE; + allowed = allowed && !crit; msr_mask = MSR_CE|MSR_ME|MSR_DE; break; case BOOKE_IRQPRIO_DEBUG: - allowed = vcpu->arch.msr & MSR_DE; + allowed = vcpu->arch.shared->msr & MSR_DE; msr_mask = MSR_ME; break; } if (allowed) { - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.shared->srr0 = vcpu->arch.pc; + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) vcpu->arch.esr = vcpu->arch.queued_esr; if (update_dear == true) - vcpu->arch.dear = vcpu->arch.queued_dear; - kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); + vcpu->arch.shared->dar = vcpu->arch.queued_dear; + kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); - clear_bit(priority, &vcpu->arch.pending_exceptions); + if (!keep_irq) + clear_bit(priority, &vcpu->arch.pending_exceptions); } return allowed; @@ -208,6 +237,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long old_pending = vcpu->arch.pending_exceptions; unsigned int priority; priority = __ffs(*pending); @@ -219,6 +249,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) BITS_PER_BYTE * sizeof(*pending), priority + 1); } + + /* Tell the guest about our interrupt status */ + if (*pending) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; } /** @@ -265,7 +301,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_PROGRAM: - if (vcpu->arch.msr & MSR_PR) { + if (vcpu->arch.shared->msr & MSR_PR) { /* Program traps generated by user-level software must be handled * by the guest kernel. */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); @@ -337,7 +373,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; + } else { + /* Guest syscalls */ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + } kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; @@ -466,15 +510,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + int i; + vcpu->arch.pc = 0; - vcpu->arch.msr = 0; + vcpu->arch.shared->msr = 0; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ vcpu->arch.shadow_pid = 1; - /* Eye-catching number so we know if the guest takes an interrupt - * before it's programmed its own IVPR. */ + /* Eye-catching numbers so we know if the guest takes an interrupt + * before it's programmed its own IVPR/IVORs. */ vcpu->arch.ivpr = 0x55550000; + for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) + vcpu->arch.ivor[i] = 0x7700 | i * 4; kvmppc_init_timing_stats(vcpu); @@ -490,14 +538,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->ctr = vcpu->arch.ctr; regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); - regs->msr = vcpu->arch.msr; - regs->srr0 = vcpu->arch.srr0; - regs->srr1 = vcpu->arch.srr1; + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.sprg0; - regs->sprg1 = vcpu->arch.sprg1; - regs->sprg2 = vcpu->arch.sprg2; - regs->sprg3 = vcpu->arch.sprg3; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; regs->sprg5 = vcpu->arch.sprg4; regs->sprg6 = vcpu->arch.sprg5; regs->sprg7 = vcpu->arch.sprg6; @@ -518,12 +566,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.srr0 = regs->srr0; - vcpu->arch.srr1 = regs->srr1; - vcpu->arch.sprg0 = regs->sprg0; - vcpu->arch.sprg1 = regs->sprg1; - vcpu->arch.sprg2 = regs->sprg2; - vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; vcpu->arch.sprg5 = regs->sprg4; vcpu->arch.sprg6 = regs->sprg5; vcpu->arch.sprg7 = regs->sprg6; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index d59bcca1f9d..492bb703035 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -46,7 +46,9 @@ #define BOOKE_IRQPRIO_FIT 17 #define BOOKE_IRQPRIO_DECREMENTER 18 #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 -#define BOOKE_IRQPRIO_MAX 19 +/* Internal pseudo-irqprio for level triggered externals */ +#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 +#define BOOKE_IRQPRIO_MAX 20 extern unsigned long kvmppc_booke_handlers; @@ -54,12 +56,12 @@ extern unsigned long kvmppc_booke_handlers; * changing. */ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR)) kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - vcpu->arch.msr = new_msr; + vcpu->arch.shared->msr = new_msr; - if (vcpu->arch.msr & MSR_WE) { + if (vcpu->arch.shared->msr & MSR_WE) { kvm_vcpu_block(vcpu); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); }; diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index cbc790ee192..1260f5f24c0 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -31,8 +31,8 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); + vcpu->arch.pc = vcpu->arch.shared->srr0; + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -62,7 +62,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_MFMSR: rt = get_rt(inst); - kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); break; @@ -74,13 +74,13 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_WRTEE: rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); break; case OP_31_XOP_WRTEEI: - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (inst & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); break; @@ -105,7 +105,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_DEAR: - vcpu->arch.dear = spr_val; break; + vcpu->arch.shared->dar = spr_val; break; case SPRN_ESR: vcpu->arch.esr = spr_val; break; case SPRN_DBCR0: @@ -200,7 +200,7 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IVPR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; case SPRN_DEAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; case SPRN_ESR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; case SPRN_DBCR0: diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 380a78cf484..049846911ce 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -415,7 +415,8 @@ lightweight_exit: lwz r8, VCPU_GPR(r8)(r4) lwz r3, VCPU_PC(r4) mtsrr0 r3 - lwz r3, VCPU_MSR(r4) + lwz r3, VCPU_SHARED(r4) + lwz r3, VCPU_SHARED_MSR(r3) oris r3, r3, KVMPPC_MSR_MASK@h ori r3, r3, KVMPPC_MSR_MASK@l mtsrr1 r3 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e8a00b0c444..71750f2dd5d 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -117,8 +117,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto uninit_vcpu; + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + return vcpu; +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_vcpu: @@ -131,6 +137,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 21011e12cae..d6d6d47a75a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -226,8 +226,7 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); stlbe->mas1 = 0; - trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + trace_kvm_stlb_inval(index_of(tlbsel, esel)); } static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -298,7 +297,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Get reference to new page. */ new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", + (long)gfn); kvm_release_page_clean(new_page); return; } @@ -314,10 +314,10 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.msr & MSR_PR); + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); stlbe->mas3 = (hpaddr & MAS3_RPN) | e500_shadow_mas3_attrib(gtlbe->mas3, - vcpu_e500->vcpu.arch.msr & MSR_PR); + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, @@ -576,28 +576,28 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); } int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); } void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) { - unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); } void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) { - unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index d28e3010a5e..458946b4775 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -171,7 +171,7 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, /* Does it match current guest AS? */ /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; gpa = get_tlb_raddr(tlbe); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b83ba581fd8..c64fd2909bb 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -242,9 +242,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) switch (sprn) { case SPRN_SRR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); + break; case SPRN_SRR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); + break; case SPRN_PVR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; case SPRN_PIR: @@ -261,13 +263,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, rt, get_tb()); break; case SPRN_SPRG0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); + break; case SPRN_SPRG1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); + break; case SPRN_SPRG2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); + break; case SPRN_SPRG3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); + break; /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ @@ -320,9 +326,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) rs = get_rs(inst); switch (sprn) { case SPRN_SRR0: - vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SRR1: - vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); + break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -337,13 +345,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case SPRN_SPRG0: - vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG1: - vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG2: - vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); + break; case SPRN_SPRG3: - vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); + break; default: emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72a4ad86ee9..2f87a1627f6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,9 +38,56 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); + return !(v->arch.shared->msr & MSR_WE) || + !!(v->arch.pending_exceptions); } +int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) +{ + int nr = kvmppc_get_gpr(vcpu, 11); + int r; + unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3); + unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4); + unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5); + unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6); + unsigned long r2 = 0; + + if (!(vcpu->arch.shared->msr & MSR_SF)) { + /* 32 bit mode */ + param1 &= 0xffffffff; + param2 &= 0xffffffff; + param3 &= 0xffffffff; + param4 &= 0xffffffff; + } + + switch (nr) { + case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + { + vcpu->arch.magic_page_pa = param1; + vcpu->arch.magic_page_ea = param2; + + r2 = KVM_MAGIC_FEAT_SR; + + r = HC_EV_SUCCESS; + break; + } + case HC_VENDOR_KVM | KVM_HC_FEATURES: + r = HC_EV_SUCCESS; +#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */ + r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); +#endif + + /* Second return value is in r4 */ + break; + default: + r = HC_EV_UNIMPLEMENTED; + break; + } + + kvmppc_set_gpr(vcpu, 4, r2); + + return r; +} int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -145,8 +192,10 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: + case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: case KVM_CAP_PPC_OSI: + case KVM_CAP_PPC_GET_PVINFO: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -534,16 +583,53 @@ out: return r; } +static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) +{ + u32 inst_lis = 0x3c000000; + u32 inst_ori = 0x60000000; + u32 inst_nop = 0x60000000; + u32 inst_sc = 0x44000002; + u32 inst_imm_mask = 0xffff; + + /* + * The hypercall to get into KVM from within guest context is as + * follows: + * + * lis r0, r0, KVM_SC_MAGIC_R0@h + * ori r0, KVM_SC_MAGIC_R0@l + * sc + * nop + */ + pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask); + pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); + pvinfo->hcall[2] = inst_sc; + pvinfo->hcall[3] = inst_nop; + + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + void __user *argp = (void __user *)arg; long r; switch (ioctl) { + case KVM_PPC_GET_PVINFO: { + struct kvm_ppc_pvinfo pvinfo; + r = kvm_vm_ioctl_get_pvinfo(&pvinfo); + if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) { + r = -EFAULT; + goto out; + } + + break; + } default: r = -ENOTTY; } +out: return r; } diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index a8e84001805..3aca1b042b8 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -98,6 +98,245 @@ TRACE_EVENT(kvm_gtlb_write, __entry->word1, __entry->word2) ); + +/************************************************************************* + * Book3S trace points * + *************************************************************************/ + +#ifdef CONFIG_PPC_BOOK3S + +TRACE_EVENT(kvm_book3s_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) + __field( unsigned long, srr1 ) + ), + + TP_fast_assign( + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; + __entry->srr1 = to_svcpu(vcpu)->shadow_srr1; + ), + + TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", + __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, + __entry->srr1) +); + +TRACE_EVENT(kvm_book3s_reenter, + TP_PROTO(int r, struct kvm_vcpu *vcpu), + TP_ARGS(r, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, r ) + __field( unsigned long, pc ) + ), + + TP_fast_assign( + __entry->r = r; + __entry->pc = kvmppc_get_pc(vcpu); + ), + + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) +); + +#ifdef CONFIG_PPC_BOOK3S_64 + +TRACE_EVENT(kvm_book3s_64_mmu_map, + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, + struct kvmppc_pte *orig_pte), + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), + + TP_STRUCT__entry( + __field( unsigned char, flag_w ) + __field( unsigned char, flag_x ) + __field( unsigned long, eaddr ) + __field( unsigned long, hpteg ) + __field( unsigned long, va ) + __field( unsigned long long, vpage ) + __field( unsigned long, hpaddr ) + ), + + TP_fast_assign( + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; + __entry->eaddr = orig_pte->eaddr; + __entry->hpteg = hpteg; + __entry->va = va; + __entry->vpage = orig_pte->vpage; + __entry->hpaddr = hpaddr; + ), + + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", + __entry->flag_w, __entry->flag_x, __entry->eaddr, + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) +); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +TRACE_EVENT(kvm_book3s_mmu_map, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_invalidate, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_flush, + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, + unsigned long long p2), + TP_ARGS(type, vcpu, p1, p2), + + TP_STRUCT__entry( + __field( int, count ) + __field( unsigned long long, p1 ) + __field( unsigned long long, p2 ) + __field( const char *, type ) + ), + + TP_fast_assign( + __entry->count = vcpu->arch.hpte_cache_count; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->type = type; + ), + + TP_printk("Flush %d %sPTEs: %llx - %llx", + __entry->count, __entry->type, __entry->p1, __entry->p2) +); + +TRACE_EVENT(kvm_book3s_slb_found, + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), + TP_ARGS(gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned long long, gvsid ) + __field( unsigned long long, hvsid ) + ), + + TP_fast_assign( + __entry->gvsid = gvsid; + __entry->hvsid = hvsid; + ), + + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) +); + +TRACE_EVENT(kvm_book3s_slb_fail, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), + TP_ARGS(sid_map_mask, gvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, gvsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->gvsid = gvsid; + ), + + TP_printk("%x/%x: %llx", __entry->sid_map_mask, + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) +); + +TRACE_EVENT(kvm_book3s_slb_map, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, + unsigned long long hvsid), + TP_ARGS(sid_map_mask, gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, guest_vsid ) + __field( unsigned long long, host_vsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->guest_vsid = gvsid; + __entry->host_vsid = hvsid; + ), + + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, + __entry->guest_vsid, __entry->host_vsid) +); + +TRACE_EVENT(kvm_book3s_slbmte, + TP_PROTO(u64 slb_vsid, u64 slb_esid), + TP_ARGS(slb_vsid, slb_esid), + + TP_STRUCT__entry( + __field( u64, slb_vsid ) + __field( u64, slb_esid ) + ), + + TP_fast_assign( + __entry->slb_vsid = slb_vsid; + __entry->slb_esid = slb_esid; + ), + + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) +); + +#endif /* CONFIG_PPC_BOOK3S */ + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 81c9208025f..956154f32cf 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -21,6 +21,16 @@ source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +config KVM_GUEST + bool "KVM Guest support" + default y + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. Overhead for the kernel when not running inside KVM should + be minimal. + + In case of doubt, say Y + config PPC_NATIVE bool depends on 6xx || PPC64 |