diff options
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r-- | arch/arm/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/arm/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/kvm/arm.c | 18 | ||||
-rw-r--r-- | arch/arm/kvm/coproc.c | 120 | ||||
-rw-r--r-- | arch/arm/kvm/coproc_a15.c | 117 | ||||
-rw-r--r-- | arch/arm/kvm/coproc_a7.c | 54 | ||||
-rw-r--r-- | arch/arm/kvm/emulate.c | 2 | ||||
-rw-r--r-- | arch/arm/kvm/guest.c | 24 | ||||
-rw-r--r-- | arch/arm/kvm/handle_exit.c | 20 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 223 | ||||
-rw-r--r-- | arch/arm/kvm/psci.c | 17 | ||||
-rw-r--r-- | arch/arm/kvm/reset.c | 19 |
12 files changed, 436 insertions, 181 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index ebf5015508b..466bd299b1a 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d99bee4950e..789bca9e64a 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787..e312e4a53f8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } @@ -797,6 +798,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4d..78c0885d650 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -71,6 +71,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, @@ -153,10 +245,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, + + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + /* TTBR0/TTBR1: swapped by interrupt.S. */ { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, @@ -195,6 +299,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + + /* * Dummy performance monitor implementation. */ { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, @@ -234,6 +345,9 @@ static const struct coproc_reg cp15_regs[] = { /* CNTKCTL: swapped by interrupt.S. */ { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -241,6 +355,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd..bb0cac1410c 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,101 +17,12 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_host.h> -#include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> #include <linux/init.h> -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; static struct kvm_coproc_target_table a15_target_table = { @@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 00000000000..1df76733158 --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * Jonathan Austin <jonathan.austin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include <linux/kvm_host.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> +#include <linux/init.h> + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + NULL, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da5..d6c00528367 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d0361218..20f8d97904a 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; switch (part_number) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: return KVM_ARM_TARGET_CORTEX_A15; default: @@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; @@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index df4c82d47ad..a92079011a8 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -73,23 +73,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer * @run: the kvm_run structure pointer * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, [HSR_EC_CP14_MR] = kvm_handle_cp14_access, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b0de86b56c1..371958370de 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> +#include <linux/hugetlb.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -93,19 +96,29 @@ static bool page_empty(void *ptr) static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); + } put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + } put_page(virt_to_page(pmd)); } @@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, continue; } + if (pud_huge(*pud)) { + /* + * If we are dealing with a huge pud, just clear it and + * move on. + */ + clear_pud_entry(kvm, pud, addr); + addr = pud_addr_end(addr, end); + continue; + } + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { addr = pmd_addr_end(addr, end); continue; } - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; + if (!kvm_pmd_huge(*pmd)) { + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; + } - /* If we emptied the pte, walk back up the ladder */ - if (page_empty(pte)) { + /* + * If the pmd entry is to be cleared, walk back up the ladder + */ + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { @@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - pmd = pmd_offset(pud, addr); + return pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 2 */ + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -507,16 +576,60 @@ out: return ret; } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to VMAs not aligned to the PMD mapping + * granularity cannot be mapped using block descriptors even + * if the pages belong to a THP for the process, because the + * stage-2 block descriptor will cover more than a single THP + * and we loose atomicity for unmapping, updates, and splits + * of the THP or other pages in the stage-2 block range. + */ + if (vma->vm_start & ~PMD_MASK) + force_pte = true; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); - - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, PAGE_S2); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) memslot = gfn_to_memslot(vcpu->kvm, gfn); - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index ae0e06b6a49..0881bf169fb 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/wait.h> +#include <asm/cputype.h> #include <asm/kvm_emulate.h> #include <asm/kvm_psci.h> @@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } + + if (!vcpu) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - vcpu = kvm_get_vcpu(kvm, cpu_id); - wq = kvm_arch_vcpu_wq(vcpu); if (!waitqueue_active(wq)) return KVM_PSCI_RET_INVAL; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee20..f558c073c02 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -30,16 +30,14 @@ #include <kvm/arm_arch_timer.h> /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; - -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level a15_vtimer_irq = { +static const struct kvm_irq_level cortexa_vtimer_irq = { { .irq = 27 }, .level = 1, }; @@ -58,23 +56,22 @@ static const struct kvm_irq_level a15_vtimer_irq = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) - return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &a15_vtimer_irq; + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); |