From dc83b8bc0256ee682506ed83853a98eaba529c6f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 18 Aug 2011 15:25:21 -0500 Subject: KVM: PPC: e500: MMU API This implements a shared-memory API for giving host userspace access to the guest's TLB. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 46efd1a265c..a284f209e2d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -193,4 +193,9 @@ static inline void kvm_rma_init(void) {} #endif +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg); +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *cfg); + #endif /* __POWERPC_KVM_PPC_H__ */ -- cgit v1.2.3-70-g09d2 From 7e28e60ef974d0eeb43112ef264d8c130f7b7bf4 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 8 Nov 2011 18:23:20 -0600 Subject: KVM: PPC: Rename deliver_interrupts to prepare_to_enter This function also updates paravirt int_pending, so rename it to be more obvious that this is a collection of checks run prior to (re)entering a guest. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 6 +++--- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a284f209e2d..c089927f64c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -94,7 +94,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); +extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e41ac6f7dcf..73fc9f04610 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) return true; } -void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 336983da9e7..536adee59c0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -482,7 +482,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) if (now > vcpu->arch.dec_expires) { /* decrementer has already gone negative */ kvmppc_core_queue_dec(vcpu); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); return; } dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC @@ -797,7 +797,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) list_for_each_entry_safe(v, vn, &vc->runnable_threads, arch.run_list) { - kvmppc_core_deliver_interrupts(v); + kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); v->stat.signal_exits++; @@ -857,7 +857,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { r = kvmppc_pseries_do_hcall(vcpu); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); } } while (r == RESUME_GUEST); return r; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e2cfb9e1e20..f3628581fb7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -764,7 +764,7 @@ program_interrupt: /* In case an interrupt came in that was triggered * from userspace (like DEC), we need to check what * to inject now! */ - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9c785896e86..e082e348c88 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -289,7 +289,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -611,7 +611,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3cf6fba513a..6186ec0d939 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -559,7 +559,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.hcall_needed = 0; } - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); r = kvmppc_vcpu_run(run, vcpu); -- cgit v1.2.3-70-g09d2 From dfd4d47e9a71c5a35eb67a44cd311efbe1846b7e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 17 Nov 2011 12:39:59 +0000 Subject: KVM: PPC: booke: Improve timer register emulation Decrementers are now properly driven by TCR/TSR, and the guest has full read/write access to these registers. The decrementer keeps ticking (and setting the TSR bit) regardless of whether the interrupts are enabled with TCR. The decrementer stops at zero, rather than going negative. Decrementers (and FITs, once implemented) are delivered as level-triggered interrupts -- dequeued when the TSR bit is cleared, not on delivery. Signed-off-by: Liu Yu [scottwood@freescale.com: significant changes] Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s.c | 8 +++++ arch/powerpc/kvm/booke.c | 67 ++++++++++++++++++++++++++++--------- arch/powerpc/kvm/booke.h | 4 +++ arch/powerpc/kvm/booke_emulate.c | 11 ++++-- arch/powerpc/kvm/emulate.c | 59 ++++++++++++++++---------------- arch/powerpc/kvm/powerpc.c | 33 +++++++----------- 8 files changed, 115 insertions(+), 70 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bfd0c9912da..66c75cddaec 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -330,7 +330,7 @@ struct kvm_vcpu_arch { u32 tbl; u32 tbu; u32 tcr; - u32 tsr; + ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; ulong ivpr; u32 pvr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c089927f64c..5192c2e7058 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); +extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5398744cd77..6bf7e0582c5 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -515,3 +515,11 @@ out: mutex_unlock(&kvm->slots_lock); return r; } + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_core_queue_dec(vcpu); + kvm_vcpu_kick(vcpu); +} diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 50803dd0b8f..9e41f45d07e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -252,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, allowed = vcpu->arch.shared->msr & MSR_ME; msr_mask = 0; break; - case BOOKE_IRQPRIO_EXTERNAL: case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: + keep_irq = true; + /* fall through */ + case BOOKE_IRQPRIO_EXTERNAL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; msr_mask = MSR_CE|MSR_ME|MSR_DE; @@ -282,11 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +static void update_timer_ints(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) + kvmppc_core_queue_dec(vcpu); + else + kvmppc_core_dequeue_dec(vcpu); +} + static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned int priority; + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { + smp_mb(); + update_timer_ints(vcpu); + } + } + priority = __ffs(*pending); while (priority <= BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -749,25 +766,16 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.shared->esr = sregs->u.e.esr; vcpu->arch.shared->dar = sregs->u.e.dear; vcpu->arch.vrsave = sregs->u.e.vrsave; - vcpu->arch.tcr = sregs->u.e.tcr; + kvmppc_set_tcr(vcpu, sregs->u.e.tcr); - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { vcpu->arch.dec = sregs->u.e.dec; - - kvmppc_emulate_dec(vcpu); + kvmppc_emulate_dec(vcpu); + } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { - /* - * FIXME: existing KVM timer handling is incomplete. - * TSR cannot be read by the guest, and its value in - * vcpu->arch is always zero. For now, just handle - * the case where the caller is trying to inject a - * decrementer interrupt. - */ - - if ((sregs->u.e.tsr & TSR_DIS) && - (vcpu->arch.tcr & TCR_DIE)) - kvmppc_core_queue_dec(vcpu); + vcpu->arch.tsr = sregs->u.e.tsr; + update_timer_ints(vcpu); } return 0; @@ -923,6 +931,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) { } +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) +{ + vcpu->arch.tcr = new_tcr; + update_timer_ints(vcpu); +} + +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + set_bits(tsr_bits, &vcpu->arch.tsr); + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + clear_bits(tsr_bits, &vcpu->arch.tsr); + update_timer_ints(vcpu); +} + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_set_tsr_bits(vcpu, TSR_DIS); +} + int __init kvmppc_booke_init(void) { unsigned long ivor[16]; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 8e1fe33d64e..2fe202705a3 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); + int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index bae9288ac1e..3e652da3653 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 + * Copyright 2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard */ @@ -115,10 +116,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_DBSR: vcpu->arch.dbsr &= ~spr_val; break; case SPRN_TSR: - vcpu->arch.tsr &= ~spr_val; break; + kvmppc_clr_tsr_bits(vcpu, spr_val); + break; case SPRN_TCR: - vcpu->arch.tcr = spr_val; - kvmppc_emulate_dec(vcpu); + kvmppc_set_tcr(vcpu, spr_val); break; /* Note: SPRG4-7 are user-readable. These values are @@ -209,6 +210,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; case SPRN_DBSR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; + case SPRN_TSR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; + case SPRN_TCR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; case SPRN_IVOR0: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index bda052e2264..968f4010188 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard */ @@ -69,57 +70,55 @@ #define OP_STH 44 #define OP_STHU 45 -#ifdef CONFIG_PPC_BOOK3S -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) -{ - return 1; -} -#else -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) -{ - /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ - return (vcpu->arch.tcr & TCR_DIE) && vcpu->arch.dec; -} -#endif - void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { unsigned long dec_nsec; unsigned long long dec_time; pr_debug("mtDEC: %x\n", vcpu->arch.dec); + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + #ifdef CONFIG_PPC_BOOK3S /* mtdec lowers the interrupt line when positive. */ kvmppc_core_dequeue_dec(vcpu); /* POWER4+ triggers a dec interrupt if the value is < 0 */ if (vcpu->arch.dec & 0x80000000) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); kvmppc_core_queue_dec(vcpu); return; } #endif - if (kvmppc_dec_enabled(vcpu)) { - /* The decrementer ticks at the same rate as the timebase, so - * that's how we convert the guest DEC value to the number of - * host ticks. */ - - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - dec_time = vcpu->arch.dec; - dec_time *= 1000; - do_div(dec_time, tb_ticks_per_usec); - dec_nsec = do_div(dec_time, NSEC_PER_SEC); - hrtimer_start(&vcpu->arch.dec_timer, - ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); - vcpu->arch.dec_jiffies = get_tb(); - } else { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - } + +#ifdef CONFIG_BOOKE + /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ + if (vcpu->arch.dec == 0) + return; +#endif + + /* + * The decrementer ticks at the same rate as the timebase, so + * that's how we convert the guest DEC value to the number of + * host ticks. + */ + + dec_time = vcpu->arch.dec; + dec_time *= 1000; + do_div(dec_time, tb_ticks_per_usec); + dec_nsec = do_div(dec_time, NSEC_PER_SEC); + hrtimer_start(&vcpu->arch.dec_timer, + ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); + vcpu->arch.dec_jiffies = get_tb(); } u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) { u64 jd = tb - vcpu->arch.dec_jiffies; + +#ifdef CONFIG_BOOKE + if (vcpu->arch.dec < jd) + return 0; +#endif + return vcpu->arch.dec - jd; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d02e4c84e21..fd8d3b16eaf 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,7 +39,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions); + !!(v->arch.pending_exceptions) || + v->requests; } int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) @@ -311,18 +312,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -static void kvmppc_decrementer_func(unsigned long data) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - - kvmppc_core_queue_dec(vcpu); - - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } -} - /* * low level hrtimer wake routine. Because this runs in hardirq context * we schedule a tasklet to do the real work. @@ -567,6 +556,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(vcpu->arch.wqp); + vcpu->stat.halt_wakeup++; + } else if (vcpu->cpu != -1) { + smp_send_reschedule(vcpu->cpu); + } +} + int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { @@ -575,13 +574,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) } kvmppc_core_queue_external(vcpu, irq); - - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } else if (vcpu->cpu != -1) { - smp_send_reschedule(vcpu->cpu); - } + kvm_vcpu_kick(vcpu); return 0; } -- cgit v1.2.3-70-g09d2 From c77162dee7aff6ab5f075da9b60f649cbbeb86cc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:31:00 +0000 Subject: KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() This removes the code from kvmppc_core_prepare_memory_region() that looked up the VMA for the region being added and called hva_to_page to get the pfns for the memory. We have no guarantee that there will be anything mapped there at the time of the KVM_SET_USER_MEMORY_REGION ioctl call; userspace can do that ioctl and then map memory into the region later. Instead we defer looking up the pfn for each memory page until it is needed, which generally means when the guest does an H_ENTER hcall on the page. Since we can't call get_user_pages in real mode, if we don't already have the pfn for the page, kvmppc_h_enter() will return H_TOO_HARD and we then call kvmppc_virtmode_h_enter() once we get back to kernel context. That calls kvmppc_get_guest_page() to get the pfn for the page, and then calls back to kvmppc_h_enter() to redo the HPTE insertion. When the first vcpu starts executing, we need to have the RMO or VRMA region mapped so that the guest's real mode accesses will work. Thus we now have a check in kvmppc_vcpu_run() to see if the RMO/VRMA is set up and if not, call kvmppc_hv_setup_rma(). It checks if the memslot starting at guest physical 0 now has RMO memory mapped there; if so it sets it up for the guest, otherwise on POWER7 it sets up the VRMA. The function that does that, kvmppc_map_vrma, is now a bit simpler, as it calls kvmppc_virtmode_h_enter instead of creating the HPTE itself. Since we are now potentially updating entries in the slot_phys[] arrays from multiple vcpu threads, we now have a spinlock protecting those updates to ensure that we don't lose track of any references to pages. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s.h | 4 + arch/powerpc/include/asm/kvm_book3s_64.h | 12 ++ arch/powerpc/include/asm/kvm_host.h | 2 + arch/powerpc/include/asm/kvm_ppc.h | 4 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 130 +++++++++++++--- arch/powerpc/kvm/book3s_hv.c | 244 +++++++++++++++++-------------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 54 +++---- 7 files changed, 290 insertions(+), 160 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bcf6f4f52a2..c700f43ba17 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -141,6 +141,10 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); +extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 300ec04a838..7e6f2ede44a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -101,4 +101,16 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + /* only handle 4k, 64k and 16M pages for now */ + if (!(h & HPTE_V_LARGE)) + return 1ul << 12; /* 4k page */ + if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206)) + return 1ul << 16; /* 64k page */ + if ((l & 0xff000) == 0) + return 1ul << 24; /* 16M page */ + return 0; /* error */ +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7a17ab5b905..beb22ba71e2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -194,7 +194,9 @@ struct kvm_arch { unsigned long lpcr; unsigned long rmor; struct kvmppc_rma_info *rma; + int rma_setup_done; struct list_head spapr_tce_tables; + spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5192c2e7058..1458c6740ea 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -121,8 +121,8 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm); extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -extern void kvmppc_map_vrma(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); +extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index dcd39dc64f0..87016ccd864 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -95,19 +95,17 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) { + struct kvm *kvm = vcpu->kvm; unsigned long i; unsigned long npages; - unsigned long pa; - unsigned long *hpte; - unsigned long hash; + unsigned long hp_v, hp_r; + unsigned long addr, hash; unsigned long porder = kvm->arch.ram_porder; - struct revmap_entry *rev; - unsigned long *physp; + long ret; - physp = kvm->arch.slot_phys[mem->slot]; - npages = kvm->arch.slot_npages[mem->slot]; + npages = kvm->arch.slot_npages[memslot->id]; /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -117,10 +115,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) npages = HPT_NPTEG; for (i = 0; i < npages; ++i) { - pa = physp[i]; - if (!pa) - break; - pa &= PAGE_MASK; + addr = i << porder; /* can't use hpt_hash since va > 64 bits */ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; /* @@ -130,18 +125,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) * is available and use it. */ hash = (hash << 3) + 7; - hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4)); - /* HPTE low word - RPN, protection, etc. */ - hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; - smp_wmb(); - hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | HPTE_V_LARGE | HPTE_V_VALID; - - /* Reverse map info */ - rev = &kvm->arch.revmap[hash]; - rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C | - HPTE_R_M | PP_RWXX; + hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); + if (ret != H_SUCCESS) { + pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", + addr, ret); + break; + } } } @@ -178,6 +171,92 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); } +/* + * This is called to get a reference to a guest page if there isn't + * one already in the kvm->arch.slot_phys[][] arrays. + */ +static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, + struct kvm_memory_slot *memslot) +{ + unsigned long start; + long np; + struct page *page, *pages[1]; + unsigned long *physp; + unsigned long pfn, i; + + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return -EINVAL; + i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); + if (physp[i]) + return 0; + + page = NULL; + start = gfn_to_hva_memslot(memslot, gfn); + + /* Instantiate and get the page we want access to */ + np = get_user_pages_fast(start, 1, 1, pages); + if (np != 1) + return -EINVAL; + page = pages[0]; + + /* Check it's a 16MB page */ + if (!PageHead(page) || + compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { + pr_err("page at %lx isn't 16MB (o=%d)\n", + start, compound_order(page)); + put_page(page); + return -EINVAL; + } + pfn = page_to_pfn(page); + + spin_lock(&kvm->arch.slot_phys_lock); + if (!physp[i]) + physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; + else + put_page(page); + spin_unlock(&kvm->arch.slot_phys_lock); + + return 0; +} + +/* + * We come here on a H_ENTER call from the guest when + * we don't have the requested page pinned already. + */ +long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long psize, gpa, gfn; + struct kvm_memory_slot *memslot; + long ret; + + psize = hpte_page_size(pteh, ptel); + if (!psize) + return H_PARAMETER; + + /* Find the memslot (if any) for this address */ + gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); + gfn = gpa >> PAGE_SHIFT; + memslot = gfn_to_memslot(kvm, gfn); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + return H_PARAMETER; + + preempt_disable(); + ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); + preempt_enable(); + if (ret == H_TOO_HARD) { + /* this can't happen */ + pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); + ret = H_RESOURCE; /* or something */ + } + return ret; + +} + static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data) { @@ -203,8 +282,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, physp += (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); pa = *physp; - if (!pa) - return NULL; + if (!pa) { + if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + return NULL; + pa = *physp; + } pfn = pa >> PAGE_SHIFT; page = pfn_to_page(pfn); get_page(page); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 82d71388eac..ce5a13fb974 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -49,6 +49,7 @@ #include #include #include +#include #define LARGE_PAGE_ORDER 24 /* 16MB pages */ @@ -57,6 +58,7 @@ /* #define EXIT_DEBUG_INT */ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); +static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { @@ -231,6 +233,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; switch (req) { + case H_ENTER: + ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + break; case H_CEDE: break; case H_PROD: @@ -851,9 +859,12 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EINTR; } - /* On PPC970, check that we have an RMA region */ - if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) - return -EPERM; + /* On the first time here, set up VRMA or RMA */ + if (!vcpu->kvm->arch.rma_setup_done) { + r = kvmppc_hv_setup_rma(vcpu); + if (r) + return r; + } flush_fp_to_thread(current); flush_altivec_to_thread(current); @@ -1063,34 +1074,15 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } -static struct page *hva_to_page(unsigned long addr) -{ - struct page *page[1]; - int npages; - - might_sleep(); - - npages = get_user_pages_fast(addr, 1, 1, page); - - if (unlikely(npages != 1)) - return 0; - - return page[0]; -} - int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - unsigned long psize, porder; - unsigned long i, npages; - unsigned long hva; - struct kvmppc_rma_info *ri = NULL; - struct page *page; + unsigned long psize; + unsigned long npages; unsigned long *phys; - /* For now, only allow 16MB pages */ - porder = LARGE_PAGE_ORDER; - psize = 1ul << porder; + /* For now, only allow 16MB-aligned slots */ + psize = kvm->arch.ram_psize; if ((mem->memory_size & (psize - 1)) || (mem->guest_phys_addr & (psize - 1))) { pr_err("bad memory_size=%llx @ %llx\n", @@ -1099,7 +1091,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } /* Allocate a slot_phys array */ - npages = mem->memory_size >> porder; + npages = mem->memory_size >> kvm->arch.ram_porder; phys = kvm->arch.slot_phys[mem->slot]; if (!phys) { phys = vzalloc(npages * sizeof(unsigned long)); @@ -1109,39 +1101,110 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, kvm->arch.slot_npages[mem->slot] = npages; } - /* Do we already have an RMA registered? */ - if (mem->guest_phys_addr == 0 && kvm->arch.rma) - return -EINVAL; + return 0; +} - /* Is this one of our preallocated RMAs? */ - if (mem->guest_phys_addr == 0) { - struct vm_area_struct *vma; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, mem->userspace_addr); - if (vma && vma->vm_file && - vma->vm_file->f_op == &kvm_rma_fops && - mem->userspace_addr == vma->vm_start) - ri = vma->vm_file->private_data; - up_read(¤t->mm->mmap_sem); - if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { - pr_err("CPU requires an RMO\n"); - return -EINVAL; +static void unpin_slot(struct kvm *kvm, int slot_id) +{ + unsigned long *physp; + unsigned long j, npages, pfn; + struct page *page; + + physp = kvm->arch.slot_phys[slot_id]; + npages = kvm->arch.slot_npages[slot_id]; + if (physp) { + spin_lock(&kvm->arch.slot_phys_lock); + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); } + kvm->arch.slot_phys[slot_id] = NULL; + spin_unlock(&kvm->arch.slot_phys_lock); + vfree(physp); } +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) +{ + int err = 0; + struct kvm *kvm = vcpu->kvm; + struct kvmppc_rma_info *ri = NULL; + unsigned long hva; + struct kvm_memory_slot *memslot; + struct vm_area_struct *vma; + unsigned long lpcr; + unsigned long psize, porder; + unsigned long rma_size; + unsigned long rmls; + unsigned long *physp; + unsigned long i, npages, pa; + + mutex_lock(&kvm->lock); + if (kvm->arch.rma_setup_done) + goto out; /* another vcpu beat us to it */ - if (ri) { - unsigned long rma_size; - unsigned long lpcr; - long rmls; + /* Look up the memslot for guest physical address 0 */ + memslot = gfn_to_memslot(kvm, 0); - rma_size = ri->npages << PAGE_SHIFT; - if (rma_size > mem->memory_size) - rma_size = mem->memory_size; + /* We must have some memory at 0 by now */ + err = -EINVAL; + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + goto out; + + /* Look up the VMA for the start of this memory slot */ + hva = memslot->userspace_addr; + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hva); + if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) + goto up_out; + + psize = vma_kernel_pagesize(vma); + if (psize != kvm->arch.ram_psize) + goto up_out; + + /* Is this one of our preallocated RMAs? */ + if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && + hva == vma->vm_start) + ri = vma->vm_file->private_data; + + up_read(¤t->mm->mmap_sem); + + if (!ri) { + /* On POWER7, use VRMA; on PPC970, give up */ + err = -EPERM; + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + pr_err("KVM: CPU requires an RMO\n"); + goto out; + } + + /* Update VRMASD field in the LPCR */ + lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); + lpcr |= LPCR_VRMA_L; + kvm->arch.lpcr = lpcr; + + /* Create HPTEs in the hash page table for the VRMA */ + kvmppc_map_vrma(vcpu, memslot); + + } else { + /* Set up to use an RMO region */ + rma_size = ri->npages; + if (rma_size > memslot->npages) + rma_size = memslot->npages; + rma_size <<= PAGE_SHIFT; rmls = lpcr_rmls(rma_size); + err = -EINVAL; if (rmls < 0) { - pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); - return -EINVAL; + pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); + goto out; } atomic_inc(&ri->use_count); kvm->arch.rma = ri; @@ -1164,65 +1227,31 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; } kvm->arch.lpcr = lpcr; - pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", + pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); - } - for (i = 0; i < npages; ++i) { - if (ri && i < ri->npages) { - phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder); - continue; - } - hva = mem->userspace_addr + (i << porder); - page = hva_to_page(hva); - if (!page) { - pr_err("oops, no pfn for hva %lx\n", hva); - goto err; - } - /* Check it's a 16MB page */ - if (!PageHead(page) || - compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) { - pr_err("page at %lx isn't 16MB (o=%d)\n", - hva, compound_order(page)); - goto err; - } - phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE; + /* Initialize phys addrs of pages in RMO */ + porder = kvm->arch.ram_porder; + npages = rma_size >> porder; + pa = ri->base_pfn << PAGE_SHIFT; + physp = kvm->arch.slot_phys[memslot->id]; + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) + physp[i] = pa + (i << porder); + spin_unlock(&kvm->arch.slot_phys_lock); } - return 0; - - err: - return -EINVAL; -} - -static void unpin_slot(struct kvm *kvm, int slot_id) -{ - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; - - physp = kvm->arch.slot_phys[slot_id]; - npages = kvm->arch.slot_npages[slot_id]; - if (physp) { - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } - vfree(physp); - kvm->arch.slot_phys[slot_id] = NULL; - } -} + /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = 1; + err = 0; + out: + mutex_unlock(&kvm->lock); + return err; -void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) -{ - if (mem->guest_phys_addr == 0 && mem->memory_size != 0 && - !kvm->arch.rma) - kvmppc_map_vrma(kvm, mem); + up_out: + up_read(¤t->mm->mmap_sem); + goto out; } int kvmppc_core_init_vm(struct kvm *kvm) @@ -1261,6 +1290,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) } kvm->arch.lpcr = lpcr; + spin_lock_init(&kvm->arch.slot_phys_lock); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a28a6030ec9..047c5e1fd70 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -56,56 +57,54 @@ static void *real_vmalloc_addr(void *x) long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) { - unsigned long porder; struct kvm *kvm = vcpu->kvm; - unsigned long i, gfn, lpn, pa; + unsigned long i, pa, gpa, gfn, psize; + unsigned long slot_fn; unsigned long *hpte; struct revmap_entry *rev; unsigned long g_ptel = ptel; struct kvm_memory_slot *memslot; - unsigned long *physp; + unsigned long *physp, pte_size; + bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; - /* only handle 4k, 64k and 16M pages for now */ - porder = 12; - if (pteh & HPTE_V_LARGE) { - if (cpu_has_feature(CPU_FTR_ARCH_206) && - (ptel & 0xf000) == 0x1000) { - /* 64k page */ - porder = 16; - } else if ((ptel & 0xff000) == 0) { - /* 16M page */ - porder = 24; - /* lowest AVA bit must be 0 for 16M pages */ - if (pteh & 0x80) - return H_PARAMETER; - } else - return H_PARAMETER; - } - if (porder > kvm->arch.ram_porder) + psize = hpte_page_size(pteh, ptel); + if (!psize) return H_PARAMETER; - gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT; + /* Find the memslot (if any) for this address */ + gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); + gfn = gpa >> PAGE_SHIFT; memslot = builtin_gfn_to_memslot(kvm, gfn); if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) return H_PARAMETER; + slot_fn = gfn - memslot->base_gfn; + physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return H_PARAMETER; - - lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); - physp = real_vmalloc_addr(physp + lpn); + physp += slot_fn; + if (realmode) + physp = real_vmalloc_addr(physp); pa = *physp; if (!pa) - return H_PARAMETER; + return H_TOO_HARD; pa &= PAGE_MASK; + pte_size = kvm->arch.ram_psize; + if (pte_size < psize) + return H_PARAMETER; + if (pa && pte_size > psize) + pa |= gpa & (pte_size - 1); + + ptel &= ~(HPTE_R_PP0 - psize); + ptel |= pa; + /* Check WIMG */ if ((ptel & HPTE_R_WIMG) != HPTE_R_M && (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) return H_PARAMETER; pteh &= ~0x60UL; - ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); - ptel |= pa; + pteh |= HPTE_V_VALID; if (pte_index >= HPT_NPTE) return H_PARAMETER; @@ -162,6 +161,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, vcpu->arch.gpr[4] = pte_index; return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_h_enter); #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) -- cgit v1.2.3-70-g09d2 From da9d1d7f2875cc8c1ffbce8f3501d0b33f4e7a4d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:31:41 +0000 Subject: KVM: PPC: Allow use of small pages to back Book3S HV guests This relaxes the requirement that the guest memory be provided as 16MB huge pages, allowing it to be provided as normal memory, i.e. in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index the kvm->arch.slot_phys[] arrays with a small page index, even if huge pages are being used, and use the low-order 5 bits of each entry to store the order of the enclosing page with respect to normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s_64.h | 10 +++ arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 +++++++++++++++++++++---------- arch/powerpc/kvm/book3s_hv.c | 57 ++++++++------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +- 7 files changed, 132 insertions(+), 69 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7e6f2ede44a..10920f7a2b8 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, + unsigned long pagesize) +{ + unsigned long mask = (pagesize >> PAGE_SHIFT) - 1; + + if (pagesize <= PAGE_SIZE) + return 1; + return !(memslot->base_gfn & mask) && !(memslot->npages & mask); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index beb22ba71e2..9252d5e3758 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -177,14 +177,13 @@ struct revmap_entry { }; /* Low-order bits in kvm->arch.slot_phys[][] */ +#define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_psize; - unsigned long ram_porder; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1458c6740ea..fb70414db90 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fdc2c0b7fa..64447f6c049 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -237,6 +237,7 @@ #define LPCR_ISL (1ul << (63-2)) #define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) +#define LPCR_VRMASD (0x1ful << (63-16)) #define LPCR_VRMA_L (1ul << (63-12)) #define LPCR_VRMA_LP0 (1ul << (63-15)) #define LPCR_VRMA_LP1 (1ul << (63-16)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 87016ccd864..cc18f3d67a5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -34,8 +34,6 @@ #include #include -/* Pages in the VRMA are 16MB pages */ -#define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ @@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x10000) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + unsigned long porder) { - struct kvm *kvm = vcpu->kvm; unsigned long i; unsigned long npages; unsigned long hp_v, hp_r; unsigned long addr, hash; - unsigned long porder = kvm->arch.ram_porder; + unsigned long psize; + unsigned long hp0, hp1; long ret; - npages = kvm->arch.slot_npages[memslot->id]; + psize = 1ul << porder; + npages = memslot->npages >> (porder - PAGE_SHIFT); /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) if (npages > HPT_NPTEG) npages = HPT_NPTEG; + hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); + hp1 = hpte1_pgsize_encoding(psize) | + HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ @@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) * is available and use it. */ hash = (hash << 3) + 7; - hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | - (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | - HPTE_V_LARGE | HPTE_V_VALID; - hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + hp_v = hp0 | ((addr >> 16) & ~0x7fUL); + hp_r = hp1 | addr; ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); if (ret != H_SUCCESS) { pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", @@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) * one already in the kvm->arch.slot_phys[][] arrays. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot, + unsigned long psize) { unsigned long start; - long np; - struct page *page, *pages[1]; + long np, err; + struct page *page, *hpage, *pages[1]; + unsigned long s, pgsize; unsigned long *physp; - unsigned long pfn, i; + unsigned int got, pgorder; + unsigned long pfn, i, npages; physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return -EINVAL; - i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); - if (physp[i]) + if (physp[gfn - memslot->base_gfn]) return 0; page = NULL; + pgsize = psize; start = gfn_to_hva_memslot(memslot, gfn); /* Instantiate and get the page we want access to */ @@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, if (np != 1) return -EINVAL; page = pages[0]; - - /* Check it's a 16MB page */ - if (!PageHead(page) || - compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { - pr_err("page at %lx isn't 16MB (o=%d)\n", - start, compound_order(page)); - put_page(page); - return -EINVAL; + got = KVMPPC_GOT_PAGE; + + /* See if this is a large page */ + s = PAGE_SIZE; + if (PageHuge(page)) { + hpage = compound_head(page); + s <<= compound_order(hpage); + /* Get the whole large page if slot alignment is ok */ + if (s > psize && slot_is_aligned(memslot, s) && + !(memslot->userspace_addr & (s - 1))) { + start &= ~(s - 1); + pgsize = s; + page = hpage; + } } + err = -EINVAL; + if (s < psize) + goto out; pfn = page_to_pfn(page); + npages = pgsize >> PAGE_SHIFT; + pgorder = __ilog2(npages); + physp += (gfn - memslot->base_gfn) & ~(npages - 1); spin_lock(&kvm->arch.slot_phys_lock); - if (!physp[i]) - physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; - else - put_page(page); + for (i = 0; i < npages; ++i) { + if (!physp[i]) { + physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder; + got = 0; + } + } spin_unlock(&kvm->arch.slot_phys_lock); + err = 0; - return 0; + out: + if (got) { + if (PageHuge(page)) + page = compound_head(page); + put_page(page); + } + return err; } /* @@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) return H_PARAMETER; preempt_disable(); @@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct kvm_memory_slot *memslot; unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page; - unsigned long offset; - unsigned long pfn, pa; + unsigned long psize, offset; + unsigned long pa; unsigned long *physp; memslot = gfn_to_memslot(kvm, gfn); @@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return NULL; - physp += (gfn - memslot->base_gfn) >> - (kvm->arch.ram_porder - PAGE_SHIFT); + physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) return NULL; pa = *physp; } - pfn = pa >> PAGE_SHIFT; - page = pfn_to_page(pfn); + page = pfn_to_page(pa >> PAGE_SHIFT); + psize = PAGE_SIZE; + if (PageHuge(page)) { + page = compound_head(page); + psize <<= compound_order(page); + } get_page(page); - offset = gpa & (kvm->arch.ram_psize - 1); + offset = gpa & (psize - 1); if (nb_ret) - *nb_ret = kvm->arch.ram_psize - offset; + *nb_ret = psize - offset; return page_address(page) + offset; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ce5a13fb974..6ed0a84ef91 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -51,8 +51,6 @@ #include #include -#define LARGE_PAGE_ORDER 24 /* 16MB pages */ - /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -1074,24 +1072,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - unsigned long psize; unsigned long npages; unsigned long *phys; - /* For now, only allow 16MB-aligned slots */ - psize = kvm->arch.ram_psize; - if ((mem->memory_size & (psize - 1)) || - (mem->guest_phys_addr & (psize - 1))) { - pr_err("bad memory_size=%llx @ %llx\n", - mem->memory_size, mem->guest_phys_addr); - return -EINVAL; - } - /* Allocate a slot_phys array */ - npages = mem->memory_size >> kvm->arch.ram_porder; + npages = mem->memory_size >> PAGE_SHIFT; phys = kvm->arch.slot_phys[mem->slot]; if (!phys) { phys = vzalloc(npages * sizeof(unsigned long)); @@ -1119,6 +1119,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); + if (PageHuge(page)) + page = compound_head(page); SetPageDirty(page); put_page(page); } @@ -1141,12 +1143,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; - unsigned long lpcr; + unsigned long lpcr, senc; unsigned long psize, porder; unsigned long rma_size; unsigned long rmls; unsigned long *physp; - unsigned long i, npages, pa; + unsigned long i, npages; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1168,8 +1170,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - if (psize != kvm->arch.ram_psize) - goto up_out; + porder = __ilog2(psize); /* Is this one of our preallocated RMAs? */ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && @@ -1186,13 +1187,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto out; } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out; + /* Update VRMASD field in the LPCR */ - lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); - lpcr |= LPCR_VRMA_L; + senc = slb_pgsize_encoding(psize); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); kvm->arch.lpcr = lpcr; /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot); + kvmppc_map_vrma(vcpu, memslot, porder); } else { /* Set up to use an RMO region */ @@ -1231,13 +1239,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - porder = kvm->arch.ram_porder; - npages = rma_size >> porder; - pa = ri->base_pfn << PAGE_SHIFT; + npages = ri->npages; + porder = __ilog2(npages); physp = kvm->arch.slot_phys[memslot->id]; spin_lock(&kvm->arch.slot_phys_lock); for (i = 0; i < npages; ++i) - physp[i] = pa + (i << porder); + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; spin_unlock(&kvm->arch.slot_phys_lock); } @@ -1266,8 +1273,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; - kvm->arch.ram_porder = LARGE_PAGE_ORDER; kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 047c5e1fd70..c086eb0fa99 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = builtin_gfn_to_memslot(kvm, gfn); if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) return H_PARAMETER; + + /* Check if the requested page fits entirely in the memslot. */ + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; physp = kvm->arch.slot_phys[memslot->id]; @@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pa = *physp; if (!pa) return H_TOO_HARD; + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); pa &= PAGE_MASK; - pte_size = kvm->arch.ram_psize; if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize) -- cgit v1.2.3-70-g09d2 From 31f3438eca2fc90dc892e0e9963ba4b93a2c8383 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:26:50 +0000 Subject: KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code This moves the get/set_one_reg implementation down from powerpc.c into booke.c, book3s_pr.c and book3s_hv.c. This avoids #ifdefs in C code, but more importantly, it fixes a bug on Book3s HV where we were accessing beyond the end of the kvm_vcpu struct (via the to_book3s() macro) and corrupting memory, causing random crashes and file corruption. On Book3s HV we only accept setting the HIOR to zero, since the guest runs in supervisor mode and its vectors are never offset from zero. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf [agraf update to apply on top of changed ONE_REG patches] Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 3 +++ arch/powerpc/kvm/book3s_hv.c | 36 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 32 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke.c | 10 ++++++++++ arch/powerpc/kvm/powerpc.c | 38 -------------------------------------- 5 files changed, 81 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb70414db90..a61b5b5047d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -176,6 +176,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); #ifdef CONFIG_KVM_BOOK3S_64_HV diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fdc804c8393..3580db8a232 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -398,6 +398,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = put_user(0, (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + { + u64 hior; + /* Only allow this to be set to zero */ + r = get_user(hior, (u64 __user *)reg->addr); + if (!r && (hior != 0)) + r = -EINVAL; + break; + } + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { if (cpu_has_feature(CPU_FTR_HVMODE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 00efda6dc0e..ee222ec7c95 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -874,6 +874,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + if (!r) + to_book3s(vcpu)->hior_explicit = true; + break; + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9e41f45d07e..ee9e1ee9c85 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -887,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return kvmppc_core_set_sregs(vcpu, sregs); } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 59852091b38..e23270779ff 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -647,44 +647,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, - struct kvm_one_reg *reg) -{ - int r = -EINVAL; - - switch (reg->id) { -#ifdef CONFIG_PPC_BOOK3S - case KVM_REG_PPC_HIOR: - r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); - break; -#endif - default: - break; - } - - return r; -} - -static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, - struct kvm_one_reg *reg) -{ - int r = -EINVAL; - - switch (reg->id) { -#ifdef CONFIG_PPC_BOOK3S - case KVM_ONE_REG_PPC_HIOR: - r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); - if (!r) - to_book3s(vcpu)->hior_explicit = true; - break; -#endif - default: - break; - } - - return r; -} - int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { -- cgit v1.2.3-70-g09d2 From b4e706111d501991c59d2af23a299ab52a06b03d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jan 2012 16:50:10 +0100 Subject: KVM: PPC: Convert RMA allocation into generic code We have code to allocate big chunks of linear memory on bootup for later use. This code is currently used for RMA allocation, but can be useful beyond that extent. Make it generic so we can reuse it for other stuff later. Signed-off-by: Alexander Graf Acked-by: Paul Mackerras Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 7 +- arch/powerpc/include/asm/kvm_ppc.h | 8 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 8 +- arch/powerpc/kvm/book3s_hv_builtin.c | 175 +++++++++++++++++++++-------------- 5 files changed, 118 insertions(+), 82 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af438b1e8a3..8221e717bbc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -173,12 +173,13 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; -struct kvmppc_rma_info { +struct kvmppc_linear_info { void *base_virt; unsigned long base_pfn; unsigned long npages; struct list_head list; - atomic_t use_count; + atomic_t use_count; + int type; }; /* @@ -224,7 +225,7 @@ struct kvm_arch { int tlbie_lock; unsigned long lpcr; unsigned long rmor; - struct kvmppc_rma_info *rma; + struct kvmppc_linear_info *rma; unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a61b5b5047d..1c37a2f8d0f 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -128,8 +128,8 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *rma); -extern struct kvmppc_rma_info *kvm_alloc_rma(void); -extern void kvm_release_rma(struct kvmppc_rma_info *ri); +extern struct kvmppc_linear_info *kvm_alloc_rma(void); +extern void kvm_release_rma(struct kvmppc_linear_info *ri); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, @@ -187,13 +187,13 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) paca[cpu].kvm_hstate.xics_phys = addr; } -extern void kvm_rma_init(void); +extern void kvm_linear_init(void); #else static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) {} -static inline void kvm_rma_init(void) +static inline void kvm_linear_init(void) {} #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4cb8f1e9d04..4721b0c8d7b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -598,7 +598,7 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); - kvm_rma_init(); + kvm_linear_init(); ppc64_boot_msg(0x15, "Setup Done"); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3580db8a232..ce1cac76519 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1055,7 +1055,7 @@ static inline int lpcr_rmls(unsigned long rma_size) static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct kvmppc_rma_info *ri = vma->vm_file->private_data; + struct kvmppc_linear_info *ri = vma->vm_file->private_data; struct page *page; if (vmf->pgoff >= ri->npages) @@ -1080,7 +1080,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_rma_release(struct inode *inode, struct file *filp) { - struct kvmppc_rma_info *ri = filp->private_data; + struct kvmppc_linear_info *ri = filp->private_data; kvm_release_rma(ri); return 0; @@ -1093,7 +1093,7 @@ static struct file_operations kvm_rma_fops = { long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) { - struct kvmppc_rma_info *ri; + struct kvmppc_linear_info *ri; long fd; ri = kvm_alloc_rma(); @@ -1212,7 +1212,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvmppc_rma_info *ri = NULL; + struct kvmppc_linear_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a795a13f4a7..1c7e6ab5f9d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -18,6 +18,14 @@ #include #include +#define KVM_LINEAR_RMA 0 + +static void __init kvm_linear_init_one(ulong size, int count, int type); +static struct kvmppc_linear_info *kvm_alloc_linear(int type); +static void kvm_release_linear(struct kvmppc_linear_info *ri); + +/*************** RMA *************/ + /* * This maintains a list of RMAs (real mode areas) for KVM guests to use. * Each RMA has to be physically contiguous and of a size that the @@ -29,32 +37,6 @@ static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ static unsigned long kvm_rma_count; -static int __init early_parse_rma_size(char *p) -{ - if (!p) - return 1; - - kvm_rma_size = memparse(p, &p); - - return 0; -} -early_param("kvm_rma_size", early_parse_rma_size); - -static int __init early_parse_rma_count(char *p) -{ - if (!p) - return 1; - - kvm_rma_count = simple_strtoul(p, NULL, 0); - - return 0; -} -early_param("kvm_rma_count", early_parse_rma_count); - -static struct kvmppc_rma_info *rma_info; -static LIST_HEAD(free_rmas); -static DEFINE_SPINLOCK(rma_lock); - /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) @@ -81,45 +63,73 @@ static inline int lpcr_rmls(unsigned long rma_size) } } -/* - * Called at boot time while the bootmem allocator is active, - * to allocate contiguous physical memory for the real memory - * areas for guests. - */ -void __init kvm_rma_init(void) +static int __init early_parse_rma_size(char *p) +{ + if (!p) + return 1; + + kvm_rma_size = memparse(p, &p); + + return 0; +} +early_param("kvm_rma_size", early_parse_rma_size); + +static int __init early_parse_rma_count(char *p) +{ + if (!p) + return 1; + + kvm_rma_count = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("kvm_rma_count", early_parse_rma_count); + +struct kvmppc_linear_info *kvm_alloc_rma(void) +{ + return kvm_alloc_linear(KVM_LINEAR_RMA); +} +EXPORT_SYMBOL_GPL(kvm_alloc_rma); + +void kvm_release_rma(struct kvmppc_linear_info *ri) +{ + kvm_release_linear(ri); +} +EXPORT_SYMBOL_GPL(kvm_release_rma); + +/*************** generic *************/ + +static LIST_HEAD(free_linears); +static DEFINE_SPINLOCK(linear_lock); + +static void __init kvm_linear_init_one(ulong size, int count, int type) { unsigned long i; unsigned long j, npages; - void *rma; + void *linear; struct page *pg; + const char *typestr; + struct kvmppc_linear_info *linear_info; - /* Only do this on PPC970 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return; - - if (!kvm_rma_size || !kvm_rma_count) + if (!count) return; - /* Check that the requested size is one supported in hardware */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return; - } - - npages = kvm_rma_size >> PAGE_SHIFT; - rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info)); - for (i = 0; i < kvm_rma_count; ++i) { - rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size); - pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma, - kvm_rma_size >> 20); - rma_info[i].base_virt = rma; - rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT; - rma_info[i].npages = npages; - list_add_tail(&rma_info[i].list, &free_rmas); - atomic_set(&rma_info[i].use_count, 0); - - pg = pfn_to_page(rma_info[i].base_pfn); + typestr = (type == KVM_LINEAR_RMA) ? "RMA" : ""; + + npages = size >> PAGE_SHIFT; + linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); + for (i = 0; i < count; ++i) { + linear = alloc_bootmem_align(size, size); + pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, + size >> 20); + linear_info[i].base_virt = linear; + linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; + linear_info[i].npages = npages; + linear_info[i].type = type; + list_add_tail(&linear_info[i].list, &free_linears); + atomic_set(&linear_info[i].use_count, 0); + + pg = pfn_to_page(linear_info[i].base_pfn); for (j = 0; j < npages; ++j) { atomic_inc(&pg->_count); ++pg; @@ -127,30 +137,55 @@ void __init kvm_rma_init(void) } } -struct kvmppc_rma_info *kvm_alloc_rma(void) +static struct kvmppc_linear_info *kvm_alloc_linear(int type) { - struct kvmppc_rma_info *ri; + struct kvmppc_linear_info *ri; ri = NULL; - spin_lock(&rma_lock); - if (!list_empty(&free_rmas)) { - ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list); + spin_lock(&linear_lock); + list_for_each_entry(ri, &free_linears, list) { + if (ri->type != type) + continue; + list_del(&ri->list); atomic_inc(&ri->use_count); + break; } - spin_unlock(&rma_lock); + spin_unlock(&linear_lock); return ri; } -EXPORT_SYMBOL_GPL(kvm_alloc_rma); -void kvm_release_rma(struct kvmppc_rma_info *ri) +static void kvm_release_linear(struct kvmppc_linear_info *ri) { if (atomic_dec_and_test(&ri->use_count)) { - spin_lock(&rma_lock); - list_add_tail(&ri->list, &free_rmas); - spin_unlock(&rma_lock); + spin_lock(&linear_lock); + list_add_tail(&ri->list, &free_linears); + spin_unlock(&linear_lock); } } -EXPORT_SYMBOL_GPL(kvm_release_rma); +/* + * Called at boot time while the bootmem allocator is active, + * to allocate contiguous physical memory for the hash page + * tables for guests. + */ +void __init kvm_linear_init(void) +{ + /* RMA */ + /* Only do this on PPC970 in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) + return; + + if (!kvm_rma_size || !kvm_rma_count) + return; + + /* Check that the requested size is one supported in hardware */ + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + return; + } + + kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); +} -- cgit v1.2.3-70-g09d2 From d2a1b483a4a3f4bbb5fec1877f716c15ac7fa405 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jan 2012 19:12:11 +0100 Subject: KVM: PPC: Add HPT preallocator We're currently allocating 16MB of linear memory on demand when creating a guest. That does work some times, but finding 16MB of linear memory available in the system at runtime is definitely not a given. So let's add another command line option similar to the RMA preallocator, that we can use to keep a pool of page tables around. Now, when a guest gets created it has a pretty low chance of receiving an OOM. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 19 +++++++++++++++--- arch/powerpc/kvm/book3s_hv_builtin.c | 39 +++++++++++++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8221e717bbc..1843d5d2a3b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -235,6 +235,7 @@ struct kvm_arch { int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; + struct kvmppc_linear_info *hpt_li; #endif /* CONFIG_KVM_BOOK3S_64_HV */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1c37a2f8d0f..9d6dee0f7d4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -130,6 +130,8 @@ extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *rma); extern struct kvmppc_linear_info *kvm_alloc_rma(void); extern void kvm_release_rma(struct kvmppc_linear_info *ri); +extern struct kvmppc_linear_info *kvm_alloc_hpt(void); +extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 783cd3510c9..ddc485a529f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -44,10 +44,20 @@ long kvmppc_alloc_hpt(struct kvm *kvm) unsigned long hpt; unsigned long lpid; struct revmap_entry *rev; + struct kvmppc_linear_info *li; /* Allocate guest's hashed page table */ - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN, - HPT_ORDER - PAGE_SHIFT); + li = kvm_alloc_hpt(); + if (li) { + /* using preallocated memory */ + hpt = (ulong)li->base_virt; + kvm->arch.hpt_li = li; + } else { + /* using dynamic memory */ + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| + __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); + } + if (!hpt) { pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); return -ENOMEM; @@ -88,7 +98,10 @@ void kvmppc_free_hpt(struct kvm *kvm) { clear_bit(kvm->arch.lpid, lpid_inuse); vfree(kvm->arch.revmap); - free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); + if (kvm->arch.hpt_li) + kvm_release_hpt(kvm->arch.hpt_li); + else + free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7caed1dfd7a..bed1279aa6a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -19,6 +19,7 @@ #include #define KVM_LINEAR_RMA 0 +#define KVM_LINEAR_HPT 1 static void __init kvm_linear_init_one(ulong size, int count, int type); static struct kvmppc_linear_info *kvm_alloc_linear(int type); @@ -97,6 +98,39 @@ void kvm_release_rma(struct kvmppc_linear_info *ri) } EXPORT_SYMBOL_GPL(kvm_release_rma); +/*************** HPT *************/ + +/* + * This maintains a list of big linear HPT tables that contain the GVA->HPA + * memory mappings. If we don't reserve those early on, we might not be able + * to get a big (usually 16MB) linear memory region from the kernel anymore. + */ + +static unsigned long kvm_hpt_count; + +static int __init early_parse_hpt_count(char *p) +{ + if (!p) + return 1; + + kvm_hpt_count = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("kvm_hpt_count", early_parse_hpt_count); + +struct kvmppc_linear_info *kvm_alloc_hpt(void) +{ + return kvm_alloc_linear(KVM_LINEAR_HPT); +} +EXPORT_SYMBOL_GPL(kvm_alloc_hpt); + +void kvm_release_hpt(struct kvmppc_linear_info *li) +{ + kvm_release_linear(li); +} +EXPORT_SYMBOL_GPL(kvm_release_hpt); + /*************** generic *************/ static LIST_HEAD(free_linears); @@ -114,7 +148,7 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) if (!count) return; - typestr = (type == KVM_LINEAR_RMA) ? "RMA" : ""; + typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; npages = size >> PAGE_SHIFT; linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); @@ -173,6 +207,9 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri) */ void __init kvm_linear_init(void) { + /* HPT */ + kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT); + /* RMA */ /* Only do this on PPC970 in HV mode */ if (!cpu_has_feature(CPU_FTR_HVMODE) || -- cgit v1.2.3-70-g09d2