summaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/arm.c18
-rw-r--r--arch/arm/kvm/coproc.c120
-rw-r--r--arch/arm/kvm/coproc_a15.c117
-rw-r--r--arch/arm/kvm/coproc_a7.c54
-rw-r--r--arch/arm/kvm/emulate.c2
-rw-r--r--arch/arm/kvm/guest.c24
-rw-r--r--arch/arm/kvm/handle_exit.c20
-rw-r--r--arch/arm/kvm/mmu.c223
-rw-r--r--arch/arm/kvm/psci.c17
-rw-r--r--arch/arm/kvm/reset.c19
12 files changed, 436 insertions, 181 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index ebf5015508b..466bd299b1a 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_ARM_HOST
depends on ARM_VIRT_EXT && ARM_LPAE
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index d99bee4950e..789bca9e64a 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c697db2787..e312e4a53f8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
@@ -797,6 +798,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EFAULT;
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
+ case KVM_ARM_PREFERRED_TARGET: {
+ int err;
+ struct kvm_vcpu_init init;
+
+ err = kvm_vcpu_preferred_target(&init);
+ if (err)
+ return err;
+
+ if (copy_to_user(argp, &init, sizeof(init)))
+ return -EFAULT;
+
+ return 0;
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4d..78c0885d650 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -71,6 +71,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ /*
+ * Compute guest MPIDR. We build a virtual cluster out of the
+ * vcpu_id, but we read the 'U' bit from the underlying
+ * hardware directly.
+ */
+ vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+ ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+ (vcpu->vcpu_id & 3));
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+ return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p);
+ return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+ return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 l2ctlr, ncores;
+
+ asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+ l2ctlr &= ~(3 << 24);
+ ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+ /* How many cores in the current cluster and the next ones */
+ ncores -= (vcpu->vcpu_id & ~3);
+ /* Cap it to the maximum number of cores in a single cluster */
+ ncores = min(ncores, 3U);
+ l2ctlr |= (ncores & 3) << 24;
+
+ vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 actlr;
+
+ /* ACTLR contains SMP bit: make sure you create all cpus first! */
+ asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+ /* Make the SMP bit consistent with the guest configuration */
+ if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+ actlr |= 1U << 6;
+ else
+ actlr &= ~(1U << 6);
+
+ vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+ return true;
+}
+
/* See note at ARM ARM B1.14.4 */
static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
@@ -153,10 +245,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg cp15_regs[] = {
+ /* MPIDR: we use VMPIDR for guest access. */
+ { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+ NULL, reset_mpidr, c0_MPIDR },
+
/* CSSELR: swapped by interrupt.S. */
{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
NULL, reset_unknown, c0_CSSELR },
+ /* ACTLR: trapped by HCR.TAC bit. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+ access_actlr, reset_actlr, c1_ACTLR },
+
+ /* CPACR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+ NULL, reset_val, c1_CPACR, 0x00000000 },
+
/* TTBR0/TTBR1: swapped by interrupt.S. */
{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -195,6 +299,13 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
/*
+ * L2CTLR access (guest wants to know #CPUs).
+ */
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+ access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+ /*
* Dummy performance monitor implementation.
*/
{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -234,6 +345,9 @@ static const struct coproc_reg cp15_regs[] = {
/* CNTKCTL: swapped by interrupt.S. */
{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+ /* The Configuration Base Address Register. */
+ { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
/* Target specific emulation tables */
@@ -241,6 +355,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
{
+ unsigned int i;
+
+ for (i = 1; i < table->num; i++)
+ BUG_ON(cmp_reg(&table->table[i-1],
+ &table->table[i]) >= 0);
+
target_tables[table->target] = table;
}
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd..bb0cac1410c 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,101 +17,12 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
#include <linux/init.h>
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- /*
- * Compute guest MPIDR:
- * (Even if we present only one VCPU to the guest on an SMP
- * host we don't set the U bit in the MPIDR, or vice versa, as
- * revealing the underlying hardware properties is likely to
- * be the best choice).
- */
- vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
- | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
#include "coproc.h"
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
- return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return write_to_read_only(vcpu, p);
- return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
- return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 l2ctlr, ncores;
-
- asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
- l2ctlr &= ~(3 << 24);
- ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
- l2ctlr |= (ncores & 3) << 24;
-
- vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 actlr;
-
- /* ACTLR contains SMP bit: make sure you create all cpus first! */
- asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
- /* Make the SMP bit consistent with the guest configuration */
- if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
- actlr |= 1U << 6;
- else
- actlr &= ~(1U << 6);
-
- vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = 0;
- return true;
-}
-
/*
* A15-specific CP15 registers.
* CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg a15_regs[] = {
- /* MPIDR: we use VMPIDR for guest access. */
- { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
- NULL, reset_mpidr, c0_MPIDR },
-
/* SCTLR: swapped by interrupt.S. */
{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
NULL, reset_val, c1_SCTLR, 0x00C50078 },
- /* ACTLR: trapped by HCR.TAC bit. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
- access_actlr, reset_actlr, c1_ACTLR },
- /* CPACR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_val, c1_CPACR, 0x00000000 },
-
- /*
- * L2CTLR access (guest wants to know #CPUs).
- */
- { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
- access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
- { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
- /* The Configuration Base Address Register. */
- { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
static int __init coproc_a15_init(void)
{
- unsigned int i;
-
- for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
- BUG_ON(cmp_reg(&a15_regs[i-1],
- &a15_regs[i]) >= 0);
-
kvm_register_target_coproc_table(&a15_target_table);
return 0;
}
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 00000000000..1df76733158
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ * Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ * registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+ /* SCTLR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+ NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+ .target = KVM_ARM_TARGET_CORTEX_A7,
+ .table = a7_regs,
+ .num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+ kvm_register_target_coproc_table(&a7_target_table);
+ return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da5..d6c00528367 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
if (is_pabt) {
- /* Set DFAR and DFSR */
+ /* Set IFAR and IFSR */
vcpu->arch.cp15[c6_IFAR] = addr;
is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
/* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d0361218..20f8d97904a 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
return -EINVAL;
switch (part_number) {
+ case ARM_CPU_PART_CORTEX_A7:
+ return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
return KVM_ARM_TARGET_CORTEX_A15;
default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
{
unsigned int i;
- /* We can only do a cortex A15 for now. */
+ /* We can only cope with guest==host and only on A15/A7 (for now). */
if (init->target != kvm_target_cpu())
return -EINVAL;
@@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
return kvm_reset_vcpu(vcpu);
}
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index df4c82d47ad..a92079011a8 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -73,23 +73,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
/**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
* @vcpu: the vcpu pointer
* @run: the kvm_run structure pointer
*
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
*/
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
trace_kvm_wfi(*vcpu_pc(vcpu));
- kvm_vcpu_block(vcpu);
+ if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+ kvm_vcpu_on_spin(vcpu);
+ else
+ kvm_vcpu_block(vcpu);
+
return 1;
}
static exit_handle_fn arm_exit_handlers[] = {
- [HSR_EC_WFI] = kvm_handle_wfi,
+ [HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
[HSR_EC_CP14_MR] = kvm_handle_cp14_access,
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b0de86b56c1..371958370de 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
#include <linux/mman.h>
#include <linux/kvm_host.h>
#include <linux/io.h>
+#include <linux/hugetlb.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
+
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
/*
@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)
static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
- pmd_t *pmd_table = pmd_offset(pud, 0);
- pud_clear(pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- pmd_free(NULL, pmd_table);
+ if (pud_huge(*pud)) {
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ } else {
+ pmd_t *pmd_table = pmd_offset(pud, 0);
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pmd_free(NULL, pmd_table);
+ }
put_page(virt_to_page(pud));
}
static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
{
- pte_t *pte_table = pte_offset_kernel(pmd, 0);
- pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- pte_free_kernel(NULL, pte_table);
+ if (kvm_pmd_huge(*pmd)) {
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ } else {
+ pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pte_free_kernel(NULL, pte_table);
+ }
put_page(virt_to_page(pmd));
}
@@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
continue;
}
+ if (pud_huge(*pud)) {
+ /*
+ * If we are dealing with a huge pud, just clear it and
+ * move on.
+ */
+ clear_pud_entry(kvm, pud, addr);
+ addr = pud_addr_end(addr, end);
+ continue;
+ }
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
addr = pmd_addr_end(addr, end);
continue;
}
- pte = pte_offset_kernel(pmd, addr);
- clear_pte_entry(kvm, pte, addr);
- next = addr + PAGE_SIZE;
+ if (!kvm_pmd_huge(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr);
+ clear_pte_entry(kvm, pte, addr);
+ next = addr + PAGE_SIZE;
+ }
- /* If we emptied the pte, walk back up the ladder */
- if (page_empty(pte)) {
+ /*
+ * If the pmd entry is to be cleared, walk back up the ladder
+ */
+ if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
clear_pmd_entry(kvm, pmd, addr);
next = pmd_addr_end(addr, end);
if (page_empty(pmd) && !page_empty(pud)) {
@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte, old_pte;
- /* Create 2nd stage page table mapping - Level 1 */
pgd = kvm->arch.pgd + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
if (!cache)
- return 0; /* ignore calls from kvm_set_spte_hva */
+ return NULL;
pmd = mmu_memory_cache_alloc(cache);
pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
}
- pmd = pmd_offset(pud, addr);
+ return pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+ *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+ pmd_t *pmd, old_pmd;
+
+ pmd = stage2_get_pmd(kvm, cache, addr);
+ VM_BUG_ON(!pmd);
+
+ /*
+ * Mapping in huge pages should only happen through a fault. If a
+ * page is merged into a transparent huge page, the individual
+ * subpages of that huge page should be unmapped through MMU
+ * notifiers before we get here.
+ *
+ * Merging of CompoundPages is not supported; they should become
+ * splitting first, unmapped, merged, and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+ old_pmd = *pmd;
+ kvm_set_pmd(pmd, *new_pmd);
+ if (pmd_present(old_pmd))
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ else
+ get_page(virt_to_page(pmd));
+ return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+ pmd_t *pmd;
+ pte_t *pte, old_pte;
- /* Create 2nd stage page table mapping - Level 2 */
+ /* Create stage-2 page table mapping - Level 1 */
+ pmd = stage2_get_pmd(kvm, cache, addr);
+ if (!pmd) {
+ /*
+ * Ignore calls from kvm_set_spte_hva for unallocated
+ * address ranges.
+ */
+ return 0;
+ }
+
+ /* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
@@ -507,16 +576,60 @@ out:
return ret;
}
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+ pfn_t pfn = *pfnp;
+ gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+ if (PageTransCompound(pfn_to_page(pfn))) {
+ unsigned long mask;
+ /*
+ * The address we faulted on is backed by a transparent huge
+ * page. However, because we map the compound huge page and
+ * not the individual tail page, we need to transfer the
+ * refcount to the head page. We have to be careful that the
+ * THP doesn't start to split while we are adjusting the
+ * refcounts.
+ *
+ * We are sure this doesn't happen, because mmu_notifier_retry
+ * was successful and we are holding the mmu_lock, so if this
+ * THP is trying to split, it will be blocked in the mmu
+ * notifier before touching any of the pages, specifically
+ * before being able to call __split_huge_page_refcount().
+ *
+ * We can therefore safely transfer the refcount from PG_tail
+ * to PG_head and switch the pfn from a tail page to the head
+ * page accordingly.
+ */
+ mask = PTRS_PER_PMD - 1;
+ VM_BUG_ON((gfn & mask) != (pfn & mask));
+ if (pfn & mask) {
+ *ipap &= PMD_MASK;
+ kvm_release_pfn_clean(pfn);
+ pfn &= ~mask;
+ kvm_get_pfn(pfn);
+ *pfnp = pfn;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- gfn_t gfn, struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot,
unsigned long fault_status)
{
- pte_t new_pte;
- pfn_t pfn;
int ret;
- bool write_fault, writable;
+ bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
+ gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+ unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
+ struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+ struct vm_area_struct *vma;
+ pfn_t pfn;
write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
if (fault_status == FSC_PERM && !write_fault) {
@@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
+ /* Let's check if we will get back a huge page backed by hugetlbfs */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (is_vm_hugetlb_page(vma)) {
+ hugetlb = true;
+ gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+ } else {
+ /*
+ * Pages belonging to VMAs not aligned to the PMD mapping
+ * granularity cannot be mapped using block descriptors even
+ * if the pages belong to a THP for the process, because the
+ * stage-2 block descriptor will cover more than a single THP
+ * and we loose atomicity for unmapping, updates, and splits
+ * of the THP or other pages in the stage-2 block range.
+ */
+ if (vma->vm_start & ~PMD_MASK)
+ force_pte = true;
+ }
+ up_read(&current->mm->mmap_sem);
+
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
if (ret)
@@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
smp_rmb();
- pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+ pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
if (is_error_pfn(pfn))
return -EFAULT;
- new_pte = pfn_pte(pfn, PAGE_S2);
- coherent_icache_guest_page(vcpu->kvm, gfn);
-
- spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
- if (writable) {
- kvm_set_s2pte_writable(&new_pte);
- kvm_set_pfn_dirty(pfn);
+ if (!hugetlb && !force_pte)
+ hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+ if (hugetlb) {
+ pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+ new_pmd = pmd_mkhuge(new_pmd);
+ if (writable) {
+ kvm_set_s2pmd_writable(&new_pmd);
+ kvm_set_pfn_dirty(pfn);
+ }
+ coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+ ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+ } else {
+ pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+ if (writable) {
+ kvm_set_s2pte_writable(&new_pte);
+ kvm_set_pfn_dirty(pfn);
+ }
+ coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+ ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
}
- stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
+ spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return 0;
+ return ret;
}
/**
@@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
memslot = gfn_to_memslot(vcpu->kvm, gfn);
- ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index ae0e06b6a49..0881bf169fb 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
#include <linux/kvm_host.h>
#include <linux/wait.h>
+#include <asm/cputype.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_psci.h>
@@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct kvm *kvm = source_vcpu->kvm;
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL, *tmp;
wait_queue_head_t *wq;
unsigned long cpu_id;
+ unsigned long mpidr;
phys_addr_t target_pc;
+ int i;
cpu_id = *vcpu_reg(source_vcpu, 1);
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
- if (cpu_id >= atomic_read(&kvm->online_vcpus))
+ kvm_for_each_vcpu(i, tmp, kvm) {
+ mpidr = kvm_vcpu_get_mpidr(tmp);
+ if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+ vcpu = tmp;
+ break;
+ }
+ }
+
+ if (!vcpu)
return KVM_PSCI_RET_INVAL;
target_pc = *vcpu_reg(source_vcpu, 2);
- vcpu = kvm_get_vcpu(kvm, cpu_id);
-
wq = kvm_arch_vcpu_wq(vcpu);
if (!waitqueue_active(wq))
return KVM_PSCI_RET_INVAL;
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 71e08baee20..f558c073c02 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -30,16 +30,14 @@
#include <kvm/arm_arch_timer.h>
/******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
*/
-static const int a15_max_cpu_idx = 3;
-
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
{ .irq = 27 },
.level = 1,
};
@@ -58,23 +56,22 @@ static const struct kvm_irq_level a15_vtimer_irq = {
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- struct kvm_regs *cpu_reset;
+ struct kvm_regs *reset_regs;
const struct kvm_irq_level *cpu_vtimer_irq;
switch (vcpu->arch.target) {
+ case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
- if (vcpu->vcpu_id > a15_max_cpu_idx)
- return -EINVAL;
- cpu_reset = &a15_regs_reset;
+ reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
- cpu_vtimer_irq = &a15_vtimer_irq;
+ cpu_vtimer_irq = &cortexa_vtimer_irq;
break;
default:
return -ENODEV;
}
/* Reset core registers */
- memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+ memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
/* Reset CP15 registers */
kvm_reset_coprocs(vcpu);