summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv_rm_mmu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 15:31:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 15:31:08 -0800
commit66cdd0ceaf65a18996f561b770eedde1d123b019 (patch)
tree4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/powerpc/kvm/book3s_hv_rm_mmu.c
parent896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff)
parent58b7825bc324da55415034a9f6ca5d716b8fd898 (diff)
Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv_rm_mmu.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c143
1 files changed, 114 insertions, 29 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fb0e821622d..19c93bae1ae 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
return __va(addr);
}
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm, unsigned long flags)
+{
+ int global;
+
+ /*
+ * If there is only one vcore, and it's currently running,
+ * we can use tlbiel as long as we mark all other physical
+ * cores as potentially having stale TLB entries for this lpid.
+ * If we're not using MMU notifiers, we never take pages away
+ * from the guest, so we can use tlbiel if requested.
+ * Otherwise, don't use tlbiel.
+ */
+ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+ global = 0;
+ else if (kvm->arch.using_mmu_notifiers)
+ global = 1;
+ else
+ global = !(flags & H_LOCAL);
+
+ if (!global) {
+ /* any other core might now have stale TLB entries... */
+ smp_wmb();
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+ cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
+ &kvm->arch.need_tlb_flush);
+ }
+
+ return global;
+}
+
/*
* Add this HPTE into the chain for the real page.
* Must be called with the chain locked; it unlocks the chain.
@@ -59,13 +90,24 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
head->back = pte_index;
} else {
rev->forw = rev->back = pte_index;
- i = pte_index;
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+ pte_index | KVMPPC_RMAP_PRESENT;
}
- smp_wmb();
- *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+ unlock_rmap(rmap);
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+ struct revmap_entry *rev)
+{
+ if (atomic_read(&kvm->arch.hpte_mod_interest))
+ rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
@@ -81,7 +123,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ if (!memslot)
return;
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
@@ -103,14 +145,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unlock_rmap(rmap);
}
-static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
if (!ptep)
return __pte(0);
if (shift)
@@ -130,15 +172,15 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
hpte[0] = hpte_v;
}
-long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i, pa, gpa, gfn, psize;
unsigned long slot_fn, hva;
unsigned long *hpte;
struct revmap_entry *rev;
- unsigned long g_ptel = ptel;
+ unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned long *physp, pte_size;
unsigned long is_io;
@@ -147,13 +189,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
- bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel);
if (!psize)
return H_PARAMETER;
writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+ ptel &= ~HPTE_GR_RESERVED;
+ g_ptel = ptel;
/* used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
@@ -183,7 +226,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
rmap = &memslot->arch.rmap[slot_fn];
if (!kvm->arch.using_mmu_notifiers) {
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
return H_PARAMETER;
physp += slot_fn;
@@ -201,7 +244,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
if (pte_present(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
@@ -210,6 +253,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = pte_pfn(pte) << PAGE_SHIFT;
}
}
+
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
@@ -287,8 +331,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
rev = &kvm->arch.revmap[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
- if (rev)
+ if (rev) {
rev->guest_rpte = g_ptel;
+ note_hpte_modification(kvm, rev);
+ }
/* Link HPTE into reverse-map chain */
if (pteh & HPTE_V_VALID) {
@@ -297,7 +343,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(vcpu, mmu_seq)) {
+ mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
@@ -318,10 +364,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
hpte[0] = pteh;
asm volatile("ptesync" : : : "memory");
- vcpu->arch.gpr[4] = pte_index;
+ *pte_idx_ret = pte_index;
return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+ vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -343,11 +396,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
-long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
- unsigned long pte_index, unsigned long avpn,
- unsigned long va)
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
@@ -369,7 +421,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index);
- if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ if (global_invalidates(kvm, flags)) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
@@ -385,13 +437,22 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
- r = rev->guest_rpte;
+ r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+ note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
- vcpu->arch.gpr[4] = v;
- vcpu->arch.gpr[5] = r;
+ hpret[0] = v;
+ hpret[1] = r;
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn)
+{
+ return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+ &vcpu->arch.gpr[4]);
+}
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
@@ -459,6 +520,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
args[j] = ((0x80 | flags) << 56) + pte_index;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ note_hpte_modification(kvm, rev);
if (!(hp[0] & HPTE_V_VALID)) {
/* insert R and C bits from PTE */
@@ -534,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_NOT_FOUND;
}
- if (atomic_read(&kvm->online_vcpus) == 1)
- flags |= H_LOCAL;
v = hpte[0];
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -548,6 +608,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (rev) {
r = (rev->guest_rpte & ~mask) | bits;
rev->guest_rpte = r;
+ note_hpte_modification(kvm, rev);
}
r = (hpte[1] & ~mask) | bits;
@@ -555,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID;
- if (!(flags & H_LOCAL)) {
+ if (global_invalidates(kvm, flags)) {
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
@@ -568,6 +629,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
+ /*
+ * If the host has this page as readonly but the guest
+ * wants to make it read/write, reduce the permissions.
+ * Checking the host permissions involves finding the
+ * memslot and then the Linux PTE for the page.
+ */
+ if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
+ unsigned long psize, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ pgd_t *pgdir = vcpu->arch.pgdir;
+ pte_t pte;
+
+ psize = hpte_page_size(v, r);
+ gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (memslot) {
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+ pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
+ }
}
hpte[1] = r;
eieio();
@@ -599,8 +682,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
}
- if (v & HPTE_V_VALID)
+ if (v & HPTE_V_VALID) {
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ r &= ~HPTE_GR_RESERVED;
+ }
vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r;
}