summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c3
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/book3s.c1
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S2
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c29
-rw-r--r--arch/powerpc/kvm/book3s_exports.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c344
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c33
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S300
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S129
-rw-r--r--arch/powerpc/kvm/book3s_pr.c59
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c158
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S54
-rw-r--r--arch/powerpc/kvm/book3s_segment.S117
-rw-r--r--arch/powerpc/kvm/booke.c10
-rw-r--r--arch/powerpc/kvm/e500.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c56
20 files changed, 891 insertions, 427 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index da3a1225c0a..7b612a76c70 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/export.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -78,6 +79,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
+ vcpu->arch.cpu_type = KVM_CPU_440;
+
return 0;
}
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 08428e2c188..3688aeecc4b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
fpu.o \
book3s_paired_singles.o \
book3s_pr.o \
+ book3s_pr_papr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+ book3s_rmhandlers.o
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+ book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_builtin.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index f68a34d1603..a459479995c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -16,6 +16,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <asm/reg.h>
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 3608471ad2d..7e06a6fc8d0 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -31,7 +31,7 @@
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
- * all other volatile GPRS = free
+ * all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index c6d3e194b6b..b871721c005 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
page, vcpu_book3s->sdr1, pteg, slbe->vsid);
- r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ /* When running a PAPR guest, SDR1 contains a HVA address instead
+ of a GPA */
+ if (vcpu_book3s->vcpu.arch.papr_enabled)
+ r = pteg;
+ else
+ r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+
if (kvm_is_error_hva(r))
return r;
return r | (pteg & ~PAGE_MASK);
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 04e7d3bbfe8..f2e6e48ea46 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
- * all other volatile GPRS = free
+ * all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 46684655708..0c9dc62532d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -63,6 +63,25 @@
* function pointers, so let's just disable the define. */
#undef mfsrin
+enum priv_level {
+ PRIV_PROBLEM = 0,
+ PRIV_SUPER = 1,
+ PRIV_HYPER = 2,
+};
+
+static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
+{
+ /* PAPR VMs only access supervisor SPRs */
+ if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
+ return false;
+
+ /* Limit user space to its own small SPR set */
+ if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
+ return false;
+
+ return true;
+}
+
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_SDR1:
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
@@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
break;
+unprivileged:
default:
printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
#ifndef DEBUG_SPR
@@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
break;
}
case SPRN_SDR1:
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
break;
case SPRN_DSISR:
@@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_HID5:
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
break;
+ case SPRN_CFAR:
+ case SPRN_PURR:
+ kvmppc_set_gpr(vcpu, rt, 0);
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
@@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
kvmppc_set_gpr(vcpu, rt, 0);
break;
default:
+unprivileged:
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
#ifndef DEBUG_SPR
emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 88c8f26add0..a150817d6d4 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -17,15 +17,13 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/kvm_book3s.h>
#ifdef CONFIG_KVM_BOOK3S_64_HV
EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
#else
-EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
-EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
-EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cc0d7f1b19a..0cdbc07cec1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -24,6 +24,7 @@
#include <linux/preempt.h>
#include <linux/sched.h>
#include <linux/delay.h>
+#include <linux/export.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
#include <linux/cpumask.h>
@@ -62,6 +63,8 @@
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
local_paca->kvm_hstate.kvm_vcpu = vcpu;
@@ -72,40 +75,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
}
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
-
-void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
-{
- u64 now;
- unsigned long dec_nsec;
-
- now = get_tb();
- if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
- kvmppc_core_queue_dec(vcpu);
- if (vcpu->arch.pending_exceptions)
- return;
- if (vcpu->arch.dec_expires != ~(u64)0) {
- dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
- tb_ticks_per_sec;
- hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
- HRTIMER_MODE_REL);
- }
-
- kvmppc_vcpu_blocked(vcpu);
-
- kvm_vcpu_block(vcpu);
- vcpu->stat.halt_wakeup++;
-
- if (vcpu->arch.dec_expires != ~(u64)0)
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-
- kvmppc_vcpu_unblocked(vcpu);
-}
-
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
{
vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
}
void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -257,15 +230,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
switch (req) {
case H_CEDE:
- vcpu->arch.shregs.msr |= MSR_EE;
- vcpu->arch.ceded = 1;
- smp_mb();
- if (!vcpu->arch.prodded)
- kvmppc_vcpu_block(vcpu);
- else
- vcpu->arch.prodded = 0;
- smp_mb();
- vcpu->arch.ceded = 0;
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
@@ -388,20 +352,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
-
- if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(tsk)) {
- vcpu->stat.signal_exits++;
- run->exit_reason = KVM_EXIT_INTR;
- r = -EINTR;
- } else {
- kvmppc_core_deliver_interrupts(vcpu);
- }
- }
-
return r;
}
@@ -479,13 +429,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
kvmppc_mmu_book3s_hv_init(vcpu);
/*
- * Some vcpus may start out in stopped state. If we initialize
- * them to busy-in-host state they will stop other vcpus in the
- * vcore from running. Instead we initialize them to blocked
- * state, effectively considering them to be stopped until we
- * see the first run ioctl for them.
+ * We consider the vcpu stopped until we see the first run ioctl for it.
*/
- vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+ vcpu->arch.state = KVMPPC_VCPU_STOPPED;
init_waitqueue_head(&vcpu->arch.cpu_run);
@@ -496,6 +442,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (vcore) {
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
+ init_waitqueue_head(&vcore->wq);
}
kvm->arch.vcores[core] = vcore;
}
@@ -506,10 +453,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
spin_lock(&vcore->lock);
++vcore->num_threads;
- ++vcore->n_blocked;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
+ kvmppc_sanity_check(vcpu);
+
return vcpu;
free_vcpu:
@@ -524,30 +473,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kfree(vcpu);
}
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long dec_nsec, now;
- spin_lock(&vc->lock);
- vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
- ++vc->n_blocked;
- if (vc->n_runnable > 0 &&
- vc->n_runnable + vc->n_blocked == vc->num_threads) {
- vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
- arch.run_list);
- wake_up(&vcpu->arch.cpu_run);
+ now = get_tb();
+ if (now > vcpu->arch.dec_expires) {
+ /* decrementer has already gone negative */
+ kvmppc_core_queue_dec(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
+ return;
}
- spin_unlock(&vc->lock);
+ dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
+ / tb_ticks_per_sec;
+ hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+ HRTIMER_MODE_REL);
+ vcpu->arch.timer_running = 1;
}
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
-
- spin_lock(&vc->lock);
- vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
- --vc->n_blocked;
- spin_unlock(&vc->lock);
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
}
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -562,6 +512,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
return;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
--vc->n_runnable;
+ ++vc->n_busy;
/* decrement the physical thread id of each following vcpu */
v = vcpu;
list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
@@ -575,15 +526,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
struct paca_struct *tpaca;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
cpu = vc->pcpu + vcpu->arch.ptid;
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.kvm_vcore = vc;
+ tpaca->kvm_hstate.napping = 0;
+ vcpu->cpu = vc->pcpu;
smp_wmb();
#ifdef CONFIG_PPC_ICP_NATIVE
if (vcpu->arch.ptid) {
tpaca->cpu_start = 0x80;
- tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
wmb();
xics_wake_cpu(cpu);
++vc->n_woken;
@@ -631,9 +587,10 @@ static int on_primary_thread(void)
*/
static int kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
+ struct kvm_vcpu *vcpu, *vcpu0, *vnext;
long ret;
u64 now;
+ int ptid;
/* don't start if any threads have a signal pending */
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -652,29 +609,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
goto out;
}
+ /*
+ * Assign physical thread IDs, first to non-ceded vcpus
+ * and then to ceded ones.
+ */
+ ptid = 0;
+ vcpu0 = NULL;
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ if (!vcpu->arch.ceded) {
+ if (!ptid)
+ vcpu0 = vcpu;
+ vcpu->arch.ptid = ptid++;
+ }
+ }
+ if (!vcpu0)
+ return 0; /* nothing to run */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ if (vcpu->arch.ceded)
+ vcpu->arch.ptid = ptid++;
+
vc->n_woken = 0;
vc->nap_count = 0;
vc->entry_exit_count = 0;
- vc->vcore_running = 1;
+ vc->vcore_state = VCORE_RUNNING;
vc->in_guest = 0;
vc->pcpu = smp_processor_id();
+ vc->napping_threads = 0;
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
kvmppc_start_thread(vcpu);
- vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
- arch.run_list);
+ preempt_disable();
spin_unlock(&vc->lock);
- preempt_disable();
kvm_guest_enter();
- __kvmppc_vcore_entry(NULL, vcpu);
+ __kvmppc_vcore_entry(NULL, vcpu0);
- /* wait for secondary threads to finish writing their state to memory */
spin_lock(&vc->lock);
+ /* disable sending of IPIs on virtual external irqs */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->cpu = -1;
+ /* wait for secondary threads to finish writing their state to memory */
if (vc->nap_count < vc->n_woken)
kvmppc_wait_for_nap(vc);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
- vc->vcore_running = 2;
+ vc->vcore_state = VCORE_EXITING;
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
@@ -690,22 +668,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
- if (!vcpu->arch.trap) {
- if (signal_pending(vcpu->arch.run_task)) {
- vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
- vcpu->arch.ret = -EINTR;
- }
- continue; /* didn't get to run */
- }
- ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
- vcpu->arch.run_task);
+
+ ret = RESUME_GUEST;
+ if (vcpu->arch.trap)
+ ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+ vcpu->arch.run_task);
+
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
+
+ if (vcpu->arch.ceded) {
+ if (ret != RESUME_GUEST)
+ kvmppc_end_cede(vcpu);
+ else
+ kvmppc_set_timer(vcpu);
+ }
}
spin_lock(&vc->lock);
out:
- vc->vcore_running = 0;
+ vc->vcore_state = VCORE_INACTIVE;
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
if (vcpu->arch.ret != RESUME_GUEST) {
@@ -717,82 +699,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
return 1;
}
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
{
- int ptid;
- int wait_state;
- struct kvmppc_vcore *vc;
DEFINE_WAIT(wait);
- /* No need to go into the guest when all we do is going out */
- if (signal_pending(current)) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
+ prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ schedule();
+ finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus. vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+ DEFINE_WAIT(wait);
+ struct kvm_vcpu *v;
+ int all_idle = 1;
+
+ prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ vc->vcore_state = VCORE_SLEEPING;
+ spin_unlock(&vc->lock);
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+ if (!v->arch.ceded || v->arch.pending_exceptions) {
+ all_idle = 0;
+ break;
+ }
}
+ if (all_idle)
+ schedule();
+ finish_wait(&vc->wq, &wait);
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+}
- /* On PPC970, check that we have an RMA region */
- if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
- return -EPERM;
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int n_ceded;
+ int prev_state;
+ struct kvmppc_vcore *vc;
+ struct kvm_vcpu *v, *vn;
kvm_run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_vsx_to_thread(current);
-
/*
* Synchronize with other threads in this virtual core
*/
vc = vcpu->arch.vcore;
spin_lock(&vc->lock);
- /* This happens the first time this is called for a vcpu */
- if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
- --vc->n_blocked;
- vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
- ptid = vc->n_runnable;
+ vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
vcpu->arch.kvm_run = kvm_run;
- vcpu->arch.ptid = ptid;
+ prev_state = vcpu->arch.state;
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
++vc->n_runnable;
- wait_state = TASK_INTERRUPTIBLE;
- while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
- if (signal_pending(current)) {
- if (!vc->vcore_running) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- vcpu->arch.ret = -EINTR;
- break;
- }
- /* have to wait for vcore to stop executing guest */
- wait_state = TASK_UNINTERRUPTIBLE;
- smp_send_reschedule(vc->pcpu);
+ /*
+ * This happens the first time this is called for a vcpu.
+ * If the vcore is already running, we may be able to start
+ * this thread straight away and have it join in.
+ */
+ if (prev_state == KVMPPC_VCPU_STOPPED) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ VCORE_EXIT_COUNT(vc) == 0) {
+ vcpu->arch.ptid = vc->n_runnable - 1;
+ kvmppc_start_thread(vcpu);
}
- if (!vc->vcore_running &&
- vc->n_runnable + vc->n_blocked == vc->num_threads) {
- /* we can run now */
- if (kvmppc_run_core(vc))
- continue;
- }
+ } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
+ --vc->n_busy;
- if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
- kvmppc_start_thread(vcpu);
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ !signal_pending(current)) {
+ if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ continue;
+ }
+ n_ceded = 0;
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+ n_ceded += v->arch.ceded;
+ if (n_ceded == vc->n_runnable)
+ kvmppc_vcore_blocked(vc);
+ else
+ kvmppc_run_core(vc);
+
+ list_for_each_entry_safe(v, vn, &vc->runnable_threads,
+ arch.run_list) {
+ kvmppc_core_deliver_interrupts(v);
+ if (signal_pending(v->arch.run_task)) {
+ kvmppc_remove_runnable(vc, v);
+ v->stat.signal_exits++;
+ v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ v->arch.ret = -EINTR;
+ wake_up(&v->arch.cpu_run);
+ }
+ }
+ }
- /* wait for other threads to come in, or wait for vcore */
- prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
- spin_unlock(&vc->lock);
- schedule();
- finish_wait(&vcpu->arch.cpu_run, &wait);
- spin_lock(&vc->lock);
+ if (signal_pending(current)) {
+ if (vc->vcore_state == VCORE_RUNNING ||
+ vc->vcore_state == VCORE_EXITING) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ }
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ kvmppc_remove_runnable(vc, vcpu);
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ }
}
- if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
- kvmppc_remove_runnable(vc, vcpu);
spin_unlock(&vc->lock);
-
return vcpu->arch.ret;
}
@@ -800,6 +830,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
+ if (!vcpu->arch.sane) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
+ /* No need to go into the guest when all we'll do is come back out */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ /* On PPC970, check that we have an RMA region */
+ if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
+ return -EPERM;
+
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
+ vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+
do {
r = kvmppc_run_vcpu(run, vcpu);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index d43120355ee..286f13d601c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -8,6 +8,7 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/bootmem.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fcfe6b05555..bacb0cfa360 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
-static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
- unsigned long pte_index)
-{
- unsigned long rb, va_low;
-
- rb = (v & ~0x7fUL) << 16; /* AVA field */
- va_low = pte_index >> 3;
- if (v & HPTE_V_SECONDARY)
- va_low = ~va_low;
- /* xor vsid from AVA */
- if (!(v & HPTE_V_1TB_SEG))
- va_low ^= v >> 12;
- else
- va_low ^= v >> 24;
- va_low &= 0x7ff;
- if (v & HPTE_V_LARGE) {
- rb |= 1; /* L field */
- if (cpu_has_feature(CPU_FTR_ARCH_206) &&
- (r & 0xff000)) {
- /* non-16MB large page, must be 64k */
- /* (masks depend on page size) */
- rb |= 0x1000; /* page encoding in LP field */
- rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
- rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
- }
- } else {
- /* 4kB page */
- rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
- }
- rb |= (v >> 54) & 0x300; /* B field */
- return rb;
-}
-
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
static inline int try_lock_tlbie(unsigned int *lock)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index de2950135e6..44d8829334a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -20,7 +20,10 @@
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
+#include <asm/mmu.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
@@ -49,7 +52,7 @@ kvmppc_skip_Hinterrupt:
b .
/*
- * Call kvmppc_handler_trampoline_enter in real mode.
+ * Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
*
* Input Registers:
@@ -89,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
kvm_start_guest:
ld r1,PACAEMERGSP(r13)
subi r1,r1,STACK_FRAME_OVERHEAD
+ ld r2,PACATOC(r13)
+
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,0
+ bne kvm_end_cede
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
@@ -276,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
cmpwi r0,0
beq 20b
- /* Set LPCR. Set the MER bit if there is a pending external irq. */
+ /* Set LPCR and RMOR. */
10: ld r8,KVM_LPCR(r9)
- ld r0,VCPU_PENDING_EXC(r4)
- li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
- oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
- and. r0,r0,r7
- beq 11f
- ori r8,r8,LPCR_MER
-11: mtspr SPRN_LPCR,r8
+ mtspr SPRN_LPCR,r8
ld r8,KVM_RMOR(r9)
mtspr SPRN_RMOR,r8
isync
@@ -448,19 +451,50 @@ toc_tlbie_lock:
mtctr r6
mtxer r7
- /* Move SRR0 and SRR1 into the respective regs */
+kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
- mtspr SPRN_SRR0, r6
- mtspr SPRN_SRR1, r7
-
ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
- ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
+ /* Check if we can deliver an external or decrementer interrupt now */
+ ld r0,VCPU_PENDING_EXC(r4)
+ li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and r0,r0,r8
+ cmpdi cr1,r0,0
+ andi. r0,r11,MSR_EE
+ beq cr1,11f
+BEGIN_FTR_SECTION
+ mfspr r8,SPRN_LPCR
+ ori r8,r8,LPCR_MER
+ mtspr SPRN_LPCR,r8
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ beq 5f
+ li r0,BOOK3S_INTERRUPT_EXTERNAL
+12: mr r6,r10
+ mr r10,r0
+ mr r7,r11
+ li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11,r11,63
+ b 5f
+11: beq 5f
+ mfspr r0,SPRN_DEC
+ cmpwi r0,0
+ li r0,BOOK3S_INTERRUPT_DECREMENTER
+ blt 12b
+
+ /* Move SRR0 and SRR1 into the respective regs */
+5: mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r7
+ li r0,0
+ stb r0,VCPU_CEDED(r4) /* cancel cede */
+
fast_guest_return:
mtspr SPRN_HSRR0,r10
mtspr SPRN_HSRR1,r11
@@ -574,21 +608,20 @@ kvmppc_interrupt:
/* See if this is something we can handle in real mode */
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
-hcall_real_cont:
/* Check for mediated interrupts (could be done earlier really ...) */
BEGIN_FTR_SECTION
cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
bne+ 1f
- ld r5,VCPU_KVM(r9)
- ld r5,KVM_LPCR(r5)
andi. r0,r11,MSR_EE
beq 1f
+ mfspr r5,SPRN_LPCR
andi. r0,r5,LPCR_MER
bne bounce_ext_interrupt
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
mfspr r5,SPRN_DEC
mftb r6
@@ -682,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
slbia
ptesync
-hdec_soon:
+hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */
BEGIN_FTR_SECTION
b 32f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -700,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
addi r0,r3,0x100
stwcx. r0,0,r6
bne 41b
+ lwsync
/*
* At this point we have an interrupt that we have to pass
@@ -713,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* interrupt, since the other threads will already be on their
* way here in that case.
*/
+ cmpwi r3,0x100 /* Are we the first here? */
+ bge 43f
+ cmpwi r3,1 /* Are any other threads in the guest? */
+ ble 43f
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
beq 40f
- cmpwi r3,0x100 /* Are we the first here? */
- bge 40f
- cmpwi r3,1
- ble 40f
li r0,0
mtspr SPRN_HDEC,r0
40:
+ /*
+ * Send an IPI to any napping threads, since an HDEC interrupt
+ * doesn't wake CPUs up from nap.
+ */
+ lwz r3,VCORE_NAPPING_THREADS(r5)
+ lwz r4,VCPU_PTID(r9)
+ li r0,1
+ sldi r0,r0,r4
+ andc. r3,r3,r0 /* no sense IPI'ing ourselves */
+ beq 43f
+ mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
+ subf r6,r4,r13
+42: andi. r0,r3,1
+ beq 44f
+ ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
+ li r0,IPI_PRIORITY
+ li r7,XICS_QIRR
+ stbcix r0,r7,r8 /* trigger the IPI */
+44: srdi. r3,r3,1
+ addi r6,r6,PACA_SIZE
+ bne 42b
/* Secondary threads wait for primary to do partition switch */
- ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
ld r5,HSTATE_KVM_VCORE(r13)
lwz r3,VCPU_PTID(r9)
cmpwi r3,0
@@ -1077,7 +1132,6 @@ hcall_try_real_mode:
hcall_real_fallback:
li r12,BOOK3S_INTERRUPT_SYSCALL
ld r9, HSTATE_KVM_VCPU(r13)
- ld r11, VCPU_MSR(r9)
b hcall_real_cont
@@ -1139,7 +1193,7 @@ hcall_real_table:
.long 0 /* 0xd4 */
.long 0 /* 0xd8 */
.long 0 /* 0xdc */
- .long 0 /* 0xe0 */
+ .long .kvmppc_h_cede - hcall_real_table
.long 0 /* 0xe4 */
.long 0 /* 0xe8 */
.long 0 /* 0xec */
@@ -1168,7 +1222,8 @@ bounce_ext_interrupt:
mtspr SPRN_SRR0,r10
mtspr SPRN_SRR1,r11
li r10,BOOK3S_INTERRUPT_EXTERNAL
- LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
+ li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11,r11,63
b fast_guest_return
_GLOBAL(kvmppc_h_set_dabr)
@@ -1177,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr)
li r3,0
blr
+_GLOBAL(kvmppc_h_cede)
+ ori r11,r11,MSR_EE
+ std r11,VCPU_MSR(r3)
+ li r0,1
+ stb r0,VCPU_CEDED(r3)
+ sync /* order setting ceded vs. testing prodded */
+ lbz r5,VCPU_PRODDED(r3)
+ cmpwi r5,0
+ bne 1f
+ li r0,0 /* set trap to 0 to say hcall is handled */
+ stw r0,VCPU_TRAP(r3)
+ li r0,H_SUCCESS
+ std r0,VCPU_GPR(r3)(r3)
+BEGIN_FTR_SECTION
+ b 2f /* just send it up to host on 970 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+ /*
+ * Set our bit in the bitmask of napping threads unless all the
+ * other threads are already napping, in which case we send this
+ * up to the host.
+ */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r6,VCPU_PTID(r3)
+ lwz r8,VCORE_ENTRY_EXIT(r5)
+ clrldi r8,r8,56
+ li r0,1
+ sld r0,r0,r6
+ addi r6,r5,VCORE_NAPPING_THREADS
+31: lwarx r4,0,r6
+ or r4,r4,r0
+ PPC_POPCNTW(r7,r4)
+ cmpw r7,r8
+ bge 2f
+ stwcx. r4,0,r6
+ bne 31b
+ li r0,1
+ stb r0,HSTATE_NAPPING(r13)
+ /* order napping_threads update vs testing entry_exit_count */
+ lwsync
+ mr r4,r3
+ lwz r7,VCORE_ENTRY_EXIT(r5)
+ cmpwi r7,0x100
+ bge 33f /* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(r14)(r3)
+ std r15, VCPU_GPR(r15)(r3)
+ std r16, VCPU_GPR(r16)(r3)
+ std r17, VCPU_GPR(r17)(r3)
+ std r18, VCPU_GPR(r18)(r3)
+ std r19, VCPU_GPR(r19)(r3)
+ std r20, VCPU_GPR(r20)(r3)
+ std r21, VCPU_GPR(r21)(r3)
+ std r22, VCPU_GPR(r22)(r3)
+ std r23, VCPU_GPR(r23)(r3)
+ std r24, VCPU_GPR(r24)(r3)
+ std r25, VCPU_GPR(r25)(r3)
+ std r26, VCPU_GPR(r26)(r3)
+ std r27, VCPU_GPR(r27)(r3)
+ std r28, VCPU_GPR(r28)(r3)
+ std r29, VCPU_GPR(r29)(r3)
+ std r30, VCPU_GPR(r30)(r3)
+ std r31, VCPU_GPR(r31)(r3)
+
+ /* save FP state */
+ bl .kvmppc_save_fp
+
+ /*
+ * Take a nap until a decrementer or external interrupt occurs,
+ * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+ */
+ li r0,0x80
+ stb r0,PACAPROCSTART(r13)
+ mfspr r5,SPRN_LPCR
+ ori r5,r5,LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR,r5
+ isync
+ li r0, 0
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ /* If we're a secondary thread and we got here by an IPI, ack it */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ lwz r3,VCPU_PTID(r4)
+ cmpwi r3,0
+ beq 27f
+ mfspr r3,SPRN_SRR1
+ rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
+ cmpwi r3,4 /* was it an external interrupt? */
+ bne 27f
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r0,0xff
+ li r6,XICS_QIRR
+ li r7,XICS_XIRR
+ lwzcix r8,r5,r7 /* ack the interrupt */
+ sync
+ stbcix r0,r5,r6 /* clear it */
+ stwcix r8,r5,r7 /* EOI it */
+27:
+ /* load up FP state */
+ bl kvmppc_load_fp
+
+ /* Load NV GPRS */
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
+ /* clear our bit in vcore->napping_threads */
+33: ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r3,VCPU_PTID(r4)
+ li r0,1
+ sld r0,r0,r3
+ addi r6,r5,VCORE_NAPPING_THREADS
+32: lwarx r7,0,r6
+ andc r7,r7,r0
+ stwcx. r7,0,r6
+ bne 32b
+ li r0,0
+ stb r0,HSTATE_NAPPING(r13)
+
+ /* see if any other thread is already exiting */
+ lwz r0,VCORE_ENTRY_EXIT(r5)
+ cmpwi r0,0x100
+ blt kvmppc_cede_reentry /* if not go back to guest */
+
+ /* some threads are exiting, so go to the guest exit path */
+ b hcall_real_fallback
+
+ /* cede when already previously prodded case */
+1: li r0,0
+ stb r0,VCPU_PRODDED(r3)
+ sync /* order testing prodded vs. clearing ceded */
+ stb r0,VCPU_CEDED(r3)
+ li r3,H_SUCCESS
+ blr
+
+ /* we've ceded but we want to give control to the host */
+2: li r3,H_TOO_HARD
+ blr
+
secondary_too_late:
ld r5,HSTATE_KVM_VCORE(r13)
HMT_LOW
@@ -1194,14 +1421,20 @@ secondary_too_late:
slbmte r6,r5
1: addi r11,r11,16
.endr
- b 50f
secondary_nap:
- /* Clear any pending IPI */
-50: ld r5, HSTATE_XICS_PHYS(r13)
+ /* Clear any pending IPI - assume we're a secondary thread */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r7, XICS_XIRR
+ lwzcix r3, r5, r7 /* ack any pending interrupt */
+ rlwinm. r0, r3, 0, 0xffffff /* any pending? */
+ beq 37f
+ sync
li r0, 0xff
li r6, XICS_QIRR
- stbcix r0, r5, r6
+ stbcix r0, r5, r6 /* clear the IPI */
+ stwcix r3, r5, r7 /* EOI it */
+37: sync
/* increment the nap count and then go to nap mode */
ld r4, HSTATE_KVM_VCORE(r13)
@@ -1211,13 +1444,12 @@ secondary_nap:
addi r3, r3, 1
stwcx. r3, 0, r4
bne 51b
- isync
+ li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
- li r0, LPCR_PECE
- andc r4, r4, r0
- ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
+ rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
+ isync
li r0, 0
std r0, HSTATE_SCRATCH0(r13)
ptesync
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index c54b0e30cf3..0a8515a5c04 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -29,27 +29,11 @@
#define ULONG_SIZE 8
#define FUNC(name) GLUE(.,name)
-#define GET_SHADOW_VCPU_R13
-
-#define DISABLE_INTERRUPTS \
- mfmsr r0; \
- rldicl r0,r0,48,1; \
- rotldi r0,r0,16; \
- mtmsrd r0,1; \
-
#elif defined(CONFIG_PPC_BOOK3S_32)
#define ULONG_SIZE 4
#define FUNC(name) name
-#define GET_SHADOW_VCPU_R13 \
- lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
-
-#define DISABLE_INTERRUPTS \
- mfmsr r0; \
- rlwinm r0,r0,0,17,15; \
- mtmsr r0; \
-
#endif /* CONFIG_PPC_BOOK3S_XX */
@@ -108,44 +92,17 @@ kvm_start_entry:
kvm_start_lightweight:
- GET_SHADOW_VCPU_R13
- PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
- PPC_STL r3, HSTATE_VMHANDLER(r13)
-
- PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
-
- DISABLE_INTERRUPTS
-
#ifdef CONFIG_PPC_BOOK3S_64
- /* Some guests may need to have dcbz set to 32 byte length.
- *
- * Usually we ensure that by patching the guest's instructions
- * to trap on dcbz and emulate it in the hypervisor.
- *
- * If we can, we should tell the CPU to use 32 byte dcbz though,
- * because that's a lot faster.
- */
-
PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
- beq no_dcbz32_on
-
- mfspr r3,SPRN_HID5
- ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */
- mtspr SPRN_HID5,r3
-
-no_dcbz32_on:
-
+ rldicl r3, r3, 0, 63 /* r3 &= 1 */
+ stb r3, HSTATE_RESTORE_HID5(r13)
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r6, VCPU_RMCALL(r4)
- mtctr r6
-
- PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
- LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
- bctr
+ bl FUNC(kvmppc_entry_trampoline)
+ nop
/*
* This is the handler in module memory. It gets jumped at from the
@@ -170,21 +127,6 @@ kvmppc_handler_highmem:
/* R7 = vcpu */
PPC_LL r7, GPR4(r1)
-#ifdef CONFIG_PPC_BOOK3S_64
-
- PPC_LL r5, VCPU_HFLAGS(r7)
- rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
- beq no_dcbz32_off
-
- li r4, 0
- mfspr r5,SPRN_HID5
- rldimi r5,r4,6,56
- mtspr SPRN_HID5,r5
-
-no_dcbz32_off:
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
PPC_STL r14, VCPU_GPR(r14)(r7)
PPC_STL r15, VCPU_GPR(r15)(r7)
PPC_STL r16, VCPU_GPR(r16)(r7)
@@ -204,67 +146,6 @@ no_dcbz32_off:
PPC_STL r30, VCPU_GPR(r30)(r7)
PPC_STL r31, VCPU_GPR(r31)(r7)
- /* Restore host msr -> SRR1 */
- PPC_LL r6, VCPU_HOST_MSR(r7)
-
- /*
- * For some interrupts, we need to call the real Linux
- * handler, so it can do work for us. This has to happen
- * as if the interrupt arrived from the kernel though,
- * so let's fake it here where most state is restored.
- *
- * Call Linux for hardware interrupts/decrementer
- * r3 = address of interrupt handler (exit reason)
- */
-
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq call_linux_handler
- cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
- beq call_linux_handler
- cmpwi r12, BOOK3S_INTERRUPT_PERFMON
- beq call_linux_handler
-
- /* Back to EE=1 */
- mtmsr r6
- sync
- b kvm_return_point
-
-call_linux_handler:
-
- /*
- * If we land here we need to jump back to the handler we
- * came from.
- *
- * We have a page that we can access from real mode, so let's
- * jump back to that and use it as a trampoline to get back into the
- * interrupt handler!
- *
- * R3 still contains the exit code,
- * R5 VCPU_HOST_RETIP and
- * R6 VCPU_HOST_MSR
- */
-
- /* Restore host IP -> SRR0 */
- PPC_LL r5, VCPU_HOST_RETIP(r7)
-
- /* XXX Better move to a safe function?
- * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
-
- mtlr r12
-
- PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
- mtsrr0 r4
- LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
- mtsrr1 r3
-
- RFI
-
-.global kvm_return_point
-kvm_return_point:
-
- /* Jump back to lightweight entry if we're supposed to */
- /* go back into the guest */
-
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
mr r5, r12
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 0c0d3f27443..bc4d50dec78 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -20,6 +20,7 @@
*/
#include <linux/kvm_host.h>
+#include <linux/export.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -150,16 +151,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
#ifdef CONFIG_PPC_BOOK3S_64
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
kvmppc_mmu_book3s_64_init(vcpu);
- to_book3s(vcpu)->hior = 0xfff00000;
+ if (!to_book3s(vcpu)->hior_sregs)
+ to_book3s(vcpu)->hior = 0xfff00000;
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
} else
#endif
{
kvmppc_mmu_book3s_32_init(vcpu);
- to_book3s(vcpu)->hior = 0;
+ if (!to_book3s(vcpu)->hior_sregs)
+ to_book3s(vcpu)->hior = 0;
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_32;
}
+ kvmppc_sanity_check(vcpu);
+
/* If we are in hypervisor level on 970, we can tell the CPU to
* treat DCBZ as 32 bytes store */
vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
@@ -646,7 +653,27 @@ program_interrupt:
break;
}
case BOOK3S_INTERRUPT_SYSCALL:
- if (vcpu->arch.osi_enabled &&
+ if (vcpu->arch.papr_enabled &&
+ (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ /* SC 1 papr hypercalls */
+ ulong cmd = kvmppc_get_gpr(vcpu, 3);
+ int i;
+
+ if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
+ r = RESUME_GUEST;
+ break;
+ }
+
+ run->papr_hcall.nr = cmd;
+ for (i = 0; i < 9; ++i) {
+ ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+ run->papr_hcall.args[i] = gpr;
+ }
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ } else if (vcpu->arch.osi_enabled &&
(((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
(((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
/* MOL hypercalls */
@@ -770,6 +797,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
}
}
+ if (sregs->u.s.flags & KVM_SREGS_S_HIOR)
+ sregs->u.s.hior = to_book3s(vcpu)->hior;
+
return 0;
}
@@ -806,6 +836,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
/* Flush the MMU after messing with the segments */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ if (sregs->u.s.flags & KVM_SREGS_S_HIOR) {
+ to_book3s(vcpu)->hior_sregs = true;
+ to_book3s(vcpu)->hior = sregs->u.s.hior;
+ }
+
return 0;
}
@@ -841,8 +876,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (!p)
goto uninit_vcpu;
- vcpu->arch.host_retip = kvm_return_point;
- vcpu->arch.host_msr = mfmsr();
#ifdef CONFIG_PPC_BOOK3S_64
/* default to book3s_64 (970fx) */
vcpu->arch.pvr = 0x3C0301;
@@ -853,16 +886,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
- /* remember where some real-mode handlers are */
- vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
- vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
- vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
-#ifdef CONFIG_PPC_BOOK3S_64
- vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
-#else
- vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
-#endif
-
vcpu->arch.shadow_msr = MSR_USER64;
err = kvmppc_mmu_init(vcpu);
@@ -908,6 +931,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
ulong ext_msr;
+ /* Check if we can run the vcpu at all */
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
new file mode 100644
index 00000000000..b9589324797
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2011. Freescale Inc. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ *
+ * Hypercall handling for running PAPR guests in PR KVM on Book 3S
+ * processors.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ unsigned long pteg_addr;
+
+ pte_index <<= 4;
+ pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
+ pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+ pteg_addr |= pte_index;
+
+ return pteg_addr;
+}
+
+static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
+{
+ long flags = kvmppc_get_gpr(vcpu, 4);
+ long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long pteg[2 * 8];
+ unsigned long pteg_addr, i, *hpte;
+
+ pte_index &= ~7UL;
+ pteg_addr = get_pteg_addr(vcpu, pte_index);
+
+ copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+ hpte = pteg;
+
+ if (likely((flags & H_EXACT) == 0)) {
+ pte_index &= ~7UL;
+ for (i = 0; ; ++i) {
+ if (i == 8)
+ return H_PTEG_FULL;
+ if ((*hpte & HPTE_V_VALID) == 0)
+ break;
+ hpte += 2;
+ }
+ } else {
+ i = kvmppc_get_gpr(vcpu, 5) & 7UL;
+ hpte += i * 2;
+ }
+
+ hpte[0] = kvmppc_get_gpr(vcpu, 6);
+ hpte[1] = kvmppc_get_gpr(vcpu, 7);
+ copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ kvmppc_set_gpr(vcpu, 4, pte_index | i);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags= kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long v = 0, pteg, rb;
+ unsigned long pte[2];
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
+ kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+ return EMULATE_DONE;
+ }
+
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ kvmppc_set_gpr(vcpu, 4, pte[0]);
+ kvmppc_set_gpr(vcpu, 5, pte[1]);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags = kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long rb, pteg, r, v;
+ unsigned long pte[2];
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
+ kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+ return EMULATE_DONE;
+ }
+
+ v = pte[0];
+ r = pte[1];
+ r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
+ HPTE_R_KEY_LO);
+ r |= (flags << 55) & HPTE_R_PP0;
+ r |= (flags << 48) & HPTE_R_KEY_HI;
+ r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ pte[1] = r;
+
+ rb = compute_tlbie_rb(v, r, pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ copy_to_user((void __user *)pteg, pte, sizeof(pte));
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
+{
+ switch (cmd) {
+ case H_ENTER:
+ return kvmppc_h_pr_enter(vcpu);
+ case H_REMOVE:
+ return kvmppc_h_pr_remove(vcpu);
+ case H_PROTECT:
+ return kvmppc_h_pr_protect(vcpu);
+ case H_BULK_REMOVE:
+ /* We just flush all PTEs, so user space can
+ handle the HPT modifications */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ break;
+ case H_CEDE:
+ kvm_vcpu_block(vcpu);
+ vcpu->stat.halt_wakeup++;
+ return EMULATE_DONE;
+ }
+
+ return EMULATE_FAIL;
+}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index c1f877c4a88..34187585c50 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,6 +20,7 @@
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
+#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
@@ -35,10 +36,10 @@
#if defined(CONFIG_PPC_BOOK3S_64)
-#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
-#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
#define FUNC(name) GLUE(.,name)
+#define MTMSR_EERI(reg) mtmsrd (reg),1
+ .globl kvmppc_skip_interrupt
kvmppc_skip_interrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
@@ -51,6 +52,7 @@ kvmppc_skip_interrupt:
rfid
b .
+ .globl kvmppc_skip_Hinterrupt
kvmppc_skip_Hinterrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
@@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt:
#elif defined(CONFIG_PPC_BOOK3S_32)
-#define MSR_NOIRQ MSR_KERNEL
#define FUNC(name) name
+#define MTMSR_EERI(reg) mtmsr (reg)
.macro INTERRUPT_TRAMPOLINE intno
@@ -167,40 +169,24 @@ kvmppc_handler_skip_ins:
#endif
/*
- * This trampoline brings us back to a real mode handler
- *
- * Input Registers:
- *
- * R5 = SRR0
- * R6 = SRR1
- * LR = real-mode IP
+ * Call kvmppc_handler_trampoline_enter in real mode
*
+ * On entry, r4 contains the guest shadow MSR
*/
-.global kvmppc_handler_lowmem_trampoline
-kvmppc_handler_lowmem_trampoline:
-
- mtsrr0 r5
+_GLOBAL(kvmppc_entry_trampoline)
+ mfmsr r5
+ LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+ toreal(r7)
+
+ li r9, MSR_RI
+ ori r9, r9, MSR_EE
+ andc r9, r5, r9 /* Clear EE and RI in MSR value */
+ li r6, MSR_IR | MSR_DR
+ ori r6, r6, MSR_EE
+ andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
+ MTMSR_EERI(r9) /* Clear EE and RI in MSR */
+ mtsrr0 r7 /* before we set srr0/1 */
mtsrr1 r6
- blr
-kvmppc_handler_lowmem_trampoline_end:
-
-/*
- * Call a function in real mode
- *
- * Input Registers:
- *
- * R3 = function
- * R4 = MSR
- * R5 = scratch register
- *
- */
-_GLOBAL(kvmppc_rmcall)
- LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
- mtmsr r5 /* Disable relocation and interrupts, so mtsrr
- doesn't get interrupted */
- sync
- mtsrr0 r3
- mtsrr1 r4
RFI
#if defined(CONFIG_PPC_BOOK3S_32)
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index aed32e51721..0676ae249b9 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -23,6 +23,7 @@
#define GET_SHADOW_VCPU(reg) \
mr reg, r13
+#define MTMSR_EERI(reg) mtmsrd (reg),1
#elif defined(CONFIG_PPC_BOOK3S_32)
@@ -30,6 +31,7 @@
tophys(reg, r2); \
lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \
tophys(reg, reg)
+#define MTMSR_EERI(reg) mtmsr (reg)
#endif
@@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter:
/* Required state:
*
* MSR = ~IR|DR
- * R13 = PACA
* R1 = host R1
* R2 = host R2
- * R10 = guest MSR
+ * R4 = guest shadow MSR
+ * R5 = normal host MSR
+ * R6 = current host MSR (EE, IR, DR off)
+ * LR = highmem guest exit code
* all other volatile GPRS = free
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
@@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter:
/* r3 = shadow vcpu */
GET_SHADOW_VCPU(r3)
+ /* Save guest exit handler address and MSR */
+ mflr r0
+ PPC_STL r0, HSTATE_VMHANDLER(r3)
+ PPC_STL r5, HSTATE_HOST_MSR(r3)
+
/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
PPC_STL r1, HSTATE_HOST_R1(r3)
PPC_STL r2, HSTATE_HOST_R2(r3)
- /* Move SRR0 and SRR1 into the respective regs */
- PPC_LL r9, SVCPU_PC(r3)
- mtsrr0 r9
- mtsrr1 r10
-
/* Activate guest mode, so faults get handled by KVM */
li r11, KVM_GUEST_MODE_GUEST
stb r11, HSTATE_IN_GUEST(r3)
@@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter:
/* Switch to guest segment. This is subarch specific. */
LOAD_GUEST_SEGMENTS
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Some guests may need to have dcbz set to 32 byte length.
+ *
+ * Usually we ensure that by patching the guest's instructions
+ * to trap on dcbz and emulate it in the hypervisor.
+ *
+ * If we can, we should tell the CPU to use 32 byte dcbz though,
+ * because that's a lot faster.
+ */
+ lbz r0, HSTATE_RESTORE_HID5(r3)
+ cmpwi r0, 0
+ beq no_dcbz32_on
+
+ mfspr r0,SPRN_HID5
+ ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
+ mtspr SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
/* Enter guest */
- PPC_LL r4, SVCPU_CTR(r3)
- PPC_LL r5, SVCPU_LR(r3)
- lwz r6, SVCPU_CR(r3)
- lwz r7, SVCPU_XER(r3)
+ PPC_LL r8, SVCPU_CTR(r3)
+ PPC_LL r9, SVCPU_LR(r3)
+ lwz r10, SVCPU_CR(r3)
+ lwz r11, SVCPU_XER(r3)
+
+ mtctr r8
+ mtlr r9
+ mtcr r10
+ mtxer r11
- mtctr r4
- mtlr r5
- mtcr r6
- mtxer r7
+ /* Move SRR0 and SRR1 into the respective regs */
+ PPC_LL r9, SVCPU_PC(r3)
+ /* First clear RI in our current MSR value */
+ li r0, MSR_RI
+ andc r6, r6, r0
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
PPC_LL r0, SVCPU_R0(r3)
PPC_LL r1, SVCPU_R1(r3)
@@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
beq ld_last_inst
cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
beq ld_last_inst
+ cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
+ beq ld_last_prev_inst
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
beq- ld_last_inst
b no_ld_last_inst
+ld_last_prev_inst:
+ addi r3, r3, -4
+
ld_last_inst:
/* Save off the guest instruction we're at */
@@ -254,6 +292,43 @@ no_ld_last_inst:
/* Switch back to host MMU */
LOAD_HOST_SEGMENTS
+#ifdef CONFIG_PPC_BOOK3S_64
+
+ lbz r5, HSTATE_RESTORE_HID5(r13)
+ cmpwi r5, 0
+ beq no_dcbz32_off
+
+ li r4, 0
+ mfspr r5,SPRN_HID5
+ rldimi r5,r4,6,56
+ mtspr SPRN_HID5,r5
+
+no_dcbz32_off:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ /*
+ * For some interrupts, we need to call the real Linux
+ * handler, so it can do work for us. This has to happen
+ * as if the interrupt arrived from the kernel though,
+ * so let's fake it here where most state is restored.
+ *
+ * Having set up SRR0/1 with the address where we want
+ * to continue with relocation on (potentially in module
+ * space), we either just go straight there with rfi[d],
+ * or we jump to an interrupt handler with bctr if there
+ * is an interrupt to be handled first. In the latter
+ * case, the rfi[d] at the end of the interrupt handler
+ * will get us back to where we want to continue.
+ */
+
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beq 1f
+ cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
+ beq 1f
+ cmpwi r12, BOOK3S_INTERRUPT_PERFMON
+1: mtctr r12
+
/* Register usage at this point:
*
* R1 = host R1
@@ -264,13 +339,15 @@ no_ld_last_inst:
*
*/
- /* RFI into the highmem handler */
- mfmsr r7
- ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
- mtsrr1 r7
- /* Load highmem handler address */
+ PPC_LL r6, HSTATE_HOST_MSR(r13)
PPC_LL r8, HSTATE_VMHANDLER(r13)
+
+ /* Restore host msr -> SRR1 */
+ mtsrr1 r6
+ /* Load highmem handler address */
mtsrr0 r8
+ /* RFI into the highmem handler, or jump to interrupt handler */
+ beqctr
RFI
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee45fa01220..bb6c988f010 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
local_irq_disable();
kvm_guest_enter();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int i;
+ int r;
vcpu->arch.pc = 0;
vcpu->arch.shared->msr = 0;
@@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_init_timing_stats(vcpu);
- return kvmppc_core_vcpu_setup(vcpu);
+ r = kvmppc_core_vcpu_setup(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 797a7447c26..26d20903f2b 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
+ vcpu->arch.cpu_type = KVM_CPU_E500V2;
+
return 0;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a107c9be0fb..efbf9ad8720 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -21,7 +21,6 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
-#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/hrtimer.h>
#include <linux/fs.h>
@@ -39,12 +38,8 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
-#ifndef CONFIG_KVM_BOOK3S_64_HV
return !(v->arch.shared->msr & MSR_WE) ||
!!(v->arch.pending_exceptions);
-#else
- return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
-#endif
}
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -95,6 +90,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
return r;
}
+int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
+{
+ int r = false;
+
+ /* We have to know what CPU to virtualize */
+ if (!vcpu->arch.pvr)
+ goto out;
+
+ /* PAPR only works with book3s_64 */
+ if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
+ goto out;
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /* HV KVM can only do PAPR mode for now */
+ if (!vcpu->arch.papr_enabled)
+ goto out;
+#endif
+
+ r = true;
+
+out:
+ vcpu->arch.sane = r;
+ return r ? 0 : -EINVAL;
+}
+
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
enum emulation_result er;
@@ -188,6 +208,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_BOOKE_SREGS:
#else
case KVM_CAP_PPC_SEGSTATE:
+ case KVM_CAP_PPC_HIOR:
+ case KVM_CAP_PPC_PAPR:
#endif
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
@@ -258,6 +280,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id);
+ vcpu->arch.wqp = &vcpu->wq;
if (!IS_ERR(vcpu))
kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu;
@@ -289,8 +312,8 @@ static void kvmppc_decrementer_func(unsigned long data)
kvmppc_core_queue_dec(vcpu);
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (waitqueue_active(vcpu->arch.wqp)) {
+ wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++;
}
}
@@ -543,13 +566,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
- if (irq->irq == KVM_INTERRUPT_UNSET)
+ if (irq->irq == KVM_INTERRUPT_UNSET) {
kvmppc_core_dequeue_external(vcpu, irq);
- else
- kvmppc_core_queue_external(vcpu, irq);
+ return 0;
+ }
+
+ kvmppc_core_queue_external(vcpu, irq);
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (waitqueue_active(vcpu->arch.wqp)) {
+ wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++;
} else if (vcpu->cpu != -1) {
smp_send_reschedule(vcpu->cpu);
@@ -571,11 +596,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.osi_enabled = true;
break;
+ case KVM_CAP_PPC_PAPR:
+ r = 0;
+ vcpu->arch.papr_enabled = true;
+ break;
default:
r = -EINVAL;
break;
}
+ if (!r)
+ r = kvmppc_sanity_check(vcpu);
+
return r;
}