diff options
author | Gleb Natapov <gleb@redhat.com> | 2013-08-30 15:33:11 +0300 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-08-30 15:33:11 +0300 |
commit | a9f6cf965e00dd3370229417675eb0127d580f96 (patch) | |
tree | 0fe5a9c57fdf6e8e614cdc02412876f153550be4 /arch/powerpc/kvm | |
parent | e5552fd252763c74ce6a6c27c7873939062b5038 (diff) | |
parent | bf550fc93d9855872a95e69e4002256110d89858 (diff) |
Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into queue
* 'kvm-ppc-next' of git://github.com/agraf/linux-2.6:
KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()
KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls
KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX
KVM: PPC: Book3S: Fix compile error in XICS emulation
KVM: PPC: Book3S PR: return appropriate error when allocation fails
arch: powerpc: kvm: add signed type cast for comparation
powerpc/kvm: Copy the pvr value after memset
KVM: PPC: Book3S PR: Load up SPRG3 register with guest value on guest entry
kvm/ppc/booke: Don't call kvm_guest_enter twice
kvm/ppc: Call trace_hardirqs_on before entry
KVM: PPC: Book3S HV: Allow negative offsets to real-mode hcall handlers
KVM: PPC: Book3S HV: Correct tlbie usage
powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
powerpc/kvm: Contiguous memory allocator based RMA allocation
powerpc/kvm: Contiguous memory allocator based hash page table allocation
KVM: PPC: Book3S: Ignore DABR register
mm/cma: Move dma contiguous changes into a seperate config
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu.c | 150 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_emulate.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 38 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 246 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_cma.c | 240 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_cma.h | 27 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 139 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_interrupts.S | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 2 |
16 files changed, 614 insertions, 335 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f86257..ffaef2cb101 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 select MMU_NOTIFIER + select CMA ---help--- Support running unmodified book3s_64 guest kernels in virtual machines on POWER7 and PPC970 processors that have diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 008cd856c5b..6646c952c5e 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_64_vio_hv.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 739bfbadb85..7e345e00661 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, hva_t ptegp; u64 pteg[16]; u64 avpn = 0; + u64 v, r; + u64 v_val, v_mask; + u64 eaddr_mask; int i; - u8 key = 0; + u8 pp, key = 0; bool found = false; - int second = 0; + bool second = false; ulong mp_ea = vcpu->arch.magic_page_ea; /* Magic page override */ @@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, goto no_seg_found; avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); + v_val = avpn & HPTE_V_AVPN; + if (slbe->tb) - avpn |= SLB_VSID_B_1T; + v_val |= SLB_VSID_B_1T; + if (slbe->large) + v_val |= HPTE_V_LARGE; + v_val |= HPTE_V_VALID; + + v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | + HPTE_V_SECONDARY; do_second: ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); @@ -227,91 +238,74 @@ do_second: key = 4; for (i=0; i<16; i+=2) { - u64 v = pteg[i]; - u64 r = pteg[i+1]; - - /* Valid check */ - if (!(v & HPTE_V_VALID)) - continue; - /* Hash check */ - if ((v & HPTE_V_SECONDARY) != second) - continue; - - /* AVPN compare */ - if (HPTE_V_COMPARE(avpn, v)) { - u8 pp = (r & HPTE_R_PP) | key; - int eaddr_mask = 0xFFF; - - gpte->eaddr = eaddr; - gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, - eaddr, - data); - if (slbe->large) - eaddr_mask = 0xFFFFFF; - gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask); - gpte->may_execute = ((r & HPTE_R_N) ? false : true); - gpte->may_read = false; - gpte->may_write = false; - - switch (pp) { - case 0: - case 1: - case 2: - case 6: - gpte->may_write = true; - /* fall through */ - case 3: - case 5: - case 7: - gpte->may_read = true; - break; - } - - dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " - "-> 0x%lx\n", - eaddr, avpn, gpte->vpage, gpte->raddr); + /* Check all relevant fields of 1st dword */ + if ((pteg[i] & v_mask) == v_val) { found = true; break; } } - /* Update PTE R and C bits, so the guest's swapper knows we used the - * page */ - if (found) { - u32 oldr = pteg[i+1]; + if (!found) { + if (second) + goto no_page_found; + v_val |= HPTE_V_SECONDARY; + second = true; + goto do_second; + } - if (gpte->may_read) { - /* Set the accessed flag */ - pteg[i+1] |= HPTE_R_R; - } - if (gpte->may_write) { - /* Set the dirty flag */ - pteg[i+1] |= HPTE_R_C; - } else { - dprintk("KVM: Mapping read-only page!\n"); - } + v = pteg[i]; + r = pteg[i+1]; + pp = (r & HPTE_R_PP) | key; + eaddr_mask = 0xFFF; + + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); + if (slbe->large) + eaddr_mask = 0xFFFFFF; + gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); + gpte->may_execute = ((r & HPTE_R_N) ? false : true); + gpte->may_read = false; + gpte->may_write = false; + + switch (pp) { + case 0: + case 1: + case 2: + case 6: + gpte->may_write = true; + /* fall through */ + case 3: + case 5: + case 7: + gpte->may_read = true; + break; + } - /* Write back into the PTEG */ - if (pteg[i+1] != oldr) - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " + "-> 0x%lx\n", + eaddr, avpn, gpte->vpage, gpte->raddr); - if (!gpte->may_read) - return -EPERM; - return 0; - } else { - dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " - "ptegp=0x%lx)\n", - eaddr, to_book3s(vcpu)->sdr1, ptegp); - for (i = 0; i < 16; i += 2) - dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", - i, pteg[i], pteg[i+1], avpn); - - if (!second) { - second = HPTE_V_SECONDARY; - goto do_second; - } + /* Update PTE R and C bits, so the guest's swapper knows we used the + * page */ + if (gpte->may_read) { + /* Set the accessed flag */ + r |= HPTE_R_R; + } + if (data && gpte->may_write) { + /* Set the dirty flag -- XXX even if not writing */ + r |= HPTE_R_C; + } + + /* Write back into the PTEG */ + if (pteg[i+1] != r) { + pteg[i+1] = r; + copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); } + if (!gpte->may_read) + return -EPERM; + return 0; + no_page_found: return -ENOENT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f7c9e8ae06e..043eec8461e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,8 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> +#include "book3s_hv_cma.h" + /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 @@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { unsigned long hpt; struct revmap_entry *rev; - struct kvmppc_linear_info *li; - long order = kvm_hpt_order; + struct page *page = NULL; + long order = KVM_DEFAULT_HPT_ORDER; if (htab_orderp) { order = *htab_orderp; @@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) order = PPC_MIN_HPT_ORDER; } + kvm->arch.hpt_cma_alloc = 0; /* - * If the user wants a different size from default, * try first to allocate it from the kernel page allocator. + * We keep the CMA reserved for failed allocation. */ - hpt = 0; - if (order != kvm_hpt_order) { - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| - __GFP_NOWARN, order - PAGE_SHIFT); - if (!hpt) - --order; - } + hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | + __GFP_NOWARN, order - PAGE_SHIFT); /* Next try to allocate from the preallocated pool */ if (!hpt) { - li = kvm_alloc_hpt(); - if (li) { - hpt = (ulong)li->base_virt; - kvm->arch.hpt_li = li; - order = kvm_hpt_order; - } + VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); + page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); + if (page) { + hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + kvm->arch.hpt_cma_alloc = 1; + } else + --order; } /* Lastly try successively smaller sizes from the page allocator */ @@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) return 0; out_freehpt: - if (kvm->arch.hpt_li) - kvm_release_hpt(kvm->arch.hpt_li); + if (kvm->arch.hpt_cma_alloc) + kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); else free_pages(hpt, order - PAGE_SHIFT); return -ENOMEM; @@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm) { kvmppc_free_lpid(kvm->arch.lpid); vfree(kvm->arch.revmap); - if (kvm->arch.hpt_li) - kvm_release_hpt(kvm->arch.hpt_li); + if (kvm->arch.hpt_cma_alloc) + kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), + 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); else free_pages(kvm->arch.hpt_virt, kvm->arch.hpt_order - PAGE_SHIFT); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1f6344c4408..360ce68c980 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: case SPRN_MSSSR0: + case SPRN_DABR: break; unprivileged: default: @@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: case SPRN_MSSSR0: + case SPRN_DABR: *spr_val = 0; break; default: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89eb4c7c527..b0ee3bc9ca7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) + struct kvm_sregs *sregs) { int i; - sregs->pvr = vcpu->arch.pvr; - memset(sregs, 0, sizeof(struct kvm_sregs)); + sregs->pvr = vcpu->arch.pvr; for (i = 0; i < vcpu->arch.slb_max; i++) { sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; @@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) + struct kvm_sregs *sregs) { int i, j; @@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size) static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct kvmppc_linear_info *ri = vma->vm_file->private_data; struct page *page; + struct kvm_rma_info *ri = vma->vm_file->private_data; - if (vmf->pgoff >= ri->npages) + if (vmf->pgoff >= kvm_rma_pages) return VM_FAULT_SIGBUS; page = pfn_to_page(ri->base_pfn + vmf->pgoff); @@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_rma_release(struct inode *inode, struct file *filp) { - struct kvmppc_linear_info *ri = filp->private_data; + struct kvm_rma_info *ri = filp->private_data; kvm_release_rma(ri); return 0; @@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = { long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) { - struct kvmppc_linear_info *ri; long fd; + struct kvm_rma_info *ri; + /* + * Only do this on PPC970 in HV mode + */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) + return -EINVAL; + + if (!kvm_rma_pages) + return -EINVAL; ri = kvm_alloc_rma(); if (!ri) @@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) if (fd < 0) kvm_release_rma(ri); - ret->rma_size = ri->npages << PAGE_SHIFT; + ret->rma_size = kvm_rma_pages << PAGE_SHIFT; return fd; } @@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvmppc_linear_info *ri = NULL; + struct kvm_rma_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; @@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } else { /* Set up to use an RMO region */ - rma_size = ri->npages; + rma_size = kvm_rma_pages; if (rma_size > memslot->npages) rma_size = memslot->npages; rma_size <<= PAGE_SHIFT; rmls = lpcr_rmls(rma_size); err = -EINVAL; - if (rmls < 0) { + if ((long)rmls < 0) { pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); goto out_srcu; } @@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* POWER7 */ lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); lpcr |= rmls << LPCR_RMLS_SH; - kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; } kvm->arch.lpcr = lpcr; pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - npages = ri->npages; + npages = kvm_rma_pages; porder = __ilog2(npages); physp = memslot->arch.slot_phys; if (physp) { @@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) /* Allocate the guest's logical partition ID */ lpid = kvmppc_alloc_lpid(); - if (lpid < 0) + if ((long)lpid < 0) return -ENOMEM; kvm->arch.lpid = lpid; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ec0a9e5de10..8cd0daebb82 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -13,33 +13,34 @@ #include <linux/spinlock.h> #include <linux/bootmem.h> #include <linux/init.h> +#include <linux/memblock.h> +#include <linux/sizes.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#define KVM_LINEAR_RMA 0 -#define KVM_LINEAR_HPT 1 - -static void __init kvm_linear_init_one(ulong size, int count, int type); -static struct kvmppc_linear_info *kvm_alloc_linear(int type); -static void kvm_release_linear(struct kvmppc_linear_info *ri); - -int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; -EXPORT_SYMBOL_GPL(kvm_hpt_order); - -/*************** RMA *************/ - +#include "book3s_hv_cma.h" +/* + * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) + * should be power of 2. + */ +#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */ +/* + * By default we reserve 5% of memory for hash pagetable allocation. + */ +static unsigned long kvm_cma_resv_ratio = 5; /* - * This maintains a list of RMAs (real mode areas) for KVM guests to use. + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. * Each RMA has to be physically contiguous and of a size that the * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, * and other larger sizes. Since we are unlikely to be allocate that * much physically contiguous memory after the system is up and running, - * we preallocate a set of RMAs in early boot for KVM to use. + * we preallocate a set of RMAs in early boot using CMA. + * should be power of 2. */ -static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ -static unsigned long kvm_rma_count; +unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ +EXPORT_SYMBOL_GPL(kvm_rma_pages); /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ @@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size) static int __init early_parse_rma_size(char *p) { - if (!p) - return 1; + unsigned long kvm_rma_size; + pr_debug("%s(%s)\n", __func__, p); + if (!p) + return -EINVAL; kvm_rma_size = memparse(p, &p); - + /* + * Check that the requested size is one supported in hardware + */ + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + return -EINVAL; + } + kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; return 0; } early_param("kvm_rma_size", early_parse_rma_size); -static int __init early_parse_rma_count(char *p) +struct kvm_rma_info *kvm_alloc_rma() { - if (!p) - return 1; - - kvm_rma_count = simple_strtoul(p, NULL, 0); - - return 0; -} -early_param("kvm_rma_count", early_parse_rma_count); - -struct kvmppc_linear_info *kvm_alloc_rma(void) -{ - return kvm_alloc_linear(KVM_LINEAR_RMA); + struct page *page; + struct kvm_rma_info *ri; + + ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); + if (!ri) + return NULL; + page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); + if (!page) + goto err_out; + atomic_set(&ri->use_count, 1); + ri->base_pfn = page_to_pfn(page); + return ri; +err_out: + kfree(ri); + return NULL; } EXPORT_SYMBOL_GPL(kvm_alloc_rma); -void kvm_release_rma(struct kvmppc_linear_info *ri) +void kvm_release_rma(struct kvm_rma_info *ri) { - kvm_release_linear(ri); + if (atomic_dec_and_test(&ri->use_count)) { + kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); + kfree(ri); + } } EXPORT_SYMBOL_GPL(kvm_release_rma); -/*************** HPT *************/ - -/* - * This maintains a list of big linear HPT tables that contain the GVA->HPA - * memory mappings. If we don't reserve those early on, we might not be able - * to get a big (usually 16MB) linear memory region from the kernel anymore. - */ - -static unsigned long kvm_hpt_count; - -static int __init early_parse_hpt_count(char *p) +static int __init early_parse_kvm_cma_resv(char *p) { + pr_debug("%s(%s)\n", __func__, p); if (!p) - return 1; - - kvm_hpt_count = simple_strtoul(p, NULL, 0); - - return 0; + return -EINVAL; + return kstrtoul(p, 0, &kvm_cma_resv_ratio); } -early_param("kvm_hpt_count", early_parse_hpt_count); +early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); -struct kvmppc_linear_info *kvm_alloc_hpt(void) +struct page *kvm_alloc_hpt(unsigned long nr_pages) { - return kvm_alloc_linear(KVM_LINEAR_HPT); + unsigned long align_pages = HPT_ALIGN_PAGES; + + /* Old CPUs require HPT aligned on a multiple of its size */ + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + align_pages = nr_pages; + return kvm_alloc_cma(nr_pages, align_pages); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); -void kvm_release_hpt(struct kvmppc_linear_info *li) +void kvm_release_hpt(struct page *page, unsigned long nr_pages) { - kvm_release_linear(li); + kvm_release_cma(page, nr_pages); } EXPORT_SYMBOL_GPL(kvm_release_hpt); -/*************** generic *************/ - -static LIST_HEAD(free_linears); -static DEFINE_SPINLOCK(linear_lock); - -static void __init kvm_linear_init_one(ulong size, int count, int type) -{ - unsigned long i; - unsigned long j, npages; - void *linear; - struct page *pg; - const char *typestr; - struct kvmppc_linear_info *linear_info; - - if (!count) - return; - - typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; - - npages = size >> PAGE_SHIFT; - linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); - for (i = 0; i < count; ++i) { - linear = alloc_bootmem_align(size, size); - pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); - linear_info[i].base_virt = linear; - linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; - linear_info[i].npages = npages; - linear_info[i].type = type; - list_add_tail(&linear_info[i].list, &free_linears); - atomic_set(&linear_info[i].use_count, 0); - - pg = pfn_to_page(linear_info[i].base_pfn); - for (j = 0; j < npages; ++j) { - atomic_inc(&pg->_count); - ++pg; - } - } -} - -static struct kvmppc_linear_info *kvm_alloc_linear(int type) -{ - struct kvmppc_linear_info *ri, *ret; - - ret = NULL; - spin_lock(&linear_lock); - list_for_each_entry(ri, &free_linears, list) { - if (ri->type != type) - continue; - - list_del(&ri->list); - atomic_inc(&ri->use_count); - memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); - ret = ri; - break; - } - spin_unlock(&linear_lock); - return ret; -} - -static void kvm_release_linear(struct kvmppc_linear_info *ri) -{ - if (atomic_dec_and_test(&ri->use_count)) { - spin_lock(&linear_lock); - list_add_tail(&ri->list, &free_linears); - spin_unlock(&linear_lock); - - } -} - -/* - * Called at boot time while the bootmem allocator is active, - * to allocate contiguous physical memory for the hash page - * tables for guests. +/** + * kvm_cma_reserve() - reserve area for kvm hash pagetable + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. */ -void __init kvm_linear_init(void) +void __init kvm_cma_reserve(void) { - /* HPT */ - kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); - - /* RMA */ - /* Only do this on PPC970 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return; - - if (!kvm_rma_size || !kvm_rma_count) - return; - - /* Check that the requested size is one supported in hardware */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return; + unsigned long align_size; + struct memblock_region *reg; + phys_addr_t selected_size = 0; + /* + * We cannot use memblock_phys_mem_size() here, because + * memblock_analyze() has not been called yet. + */ + for_each_memblock(memory, reg) + selected_size += memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + + selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; + if (selected_size) { + pr_debug("%s: reserving %ld MiB for global area\n", __func__, + (unsigned long)selected_size / SZ_1M); + /* + * Old CPUs require HPT aligned on a multiple of its size. So for them + * make the alignment as max size we could request. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + align_size = __rounddown_pow_of_two(selected_size); + else + align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; + + align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); + kvm_cma_declare_contiguous(selected_size, align_size); } - - kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); } diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c new file mode 100644 index 00000000000..d9d3d8553d5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.c @@ -0,0 +1,240 @@ +/* + * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA + * for DMA mapping framework + * + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ +#define pr_fmt(fmt) "kvm_cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include <linux/memblock.h> +#include <linux/mutex.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#include "book3s_hv_cma.h" + +struct kvm_cma { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; +}; + +static DEFINE_MUTEX(kvm_cma_mutex); +static struct kvm_cma kvm_cma_area; + +/** + * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling + * for kvm hash pagetable + * @size: Size of the reserved memory. + * @alignment: Alignment for the contiguous memory area + * + * This function reserves memory for kvm cma area. It should be + * called by arch code when early allocator (memblock or bootmem) + * is still activate. + */ +long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) +{ + long base_pfn; + phys_addr_t addr; + struct kvm_cma *cma = &kvm_cma_area; + + pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); + + if (!size) + return -EINVAL; + /* + * Sanitise input arguments. + * We should be pageblock aligned for CMA. + */ + alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); + size = ALIGN(size, alignment); + /* + * Reserve memory + * Use __memblock_alloc_base() since + * memblock_alloc_base() panic()s. + */ + addr = __memblock_alloc_base(size, alignment, 0); + if (!addr) { + base_pfn = -ENOMEM; + goto err; + } else + base_pfn = PFN_DOWN(addr); + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma->base_pfn = base_pfn; + cma->count = size >> PAGE_SHIFT; + pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); + return 0; +err: + pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); + return base_pfn; +} + +/** + * kvm_alloc_cma() - allocate pages from contiguous area + * @nr_pages: Requested number of pages. + * @align_pages: Requested alignment in number of pages + * + * This function allocates memory buffer for hash pagetable. + */ +struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) +{ + int ret; + struct page *page = NULL; + struct kvm_cma *cma = &kvm_cma_area; + unsigned long chunk_count, nr_chunk; + unsigned long mask, pfn, pageno, start = 0; + + + if (!cma || !cma->count) + return NULL; + + pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, + (void *)cma, nr_pages, align_pages); + + if (!nr_pages) + return NULL; + /* + * align mask with chunk size. The bit tracks pages in chunk size + */ + VM_BUG_ON(!is_power_of_2(align_pages)); + mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; + BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); + + chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + + mutex_lock(&kvm_cma_mutex); + for (;;) { + pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, + start, nr_chunk, mask); + if (pageno >= chunk_count) + break; + + pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); + ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); + if (ret == 0) { + bitmap_set(cma->bitmap, pageno, nr_chunk); + page = pfn_to_page(pfn); + memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); + break; + } else if (ret != -EBUSY) { + break; + } + pr_debug("%s(): memory range at %p is busy, retrying\n", + __func__, pfn_to_page(pfn)); + /* try again with a bit different memory target */ + start = pageno + mask + 1; + } + mutex_unlock(&kvm_cma_mutex); + pr_debug("%s(): returned %p\n", __func__, page); + return page; +} + +/** + * kvm_release_cma() - release allocated pages for hash pagetable + * @pages: Allocated pages. + * @nr_pages: Number of allocated pages. + * + * This function releases memory allocated by kvm_alloc_cma(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool kvm_release_cma(struct page *pages, unsigned long nr_pages) +{ + unsigned long pfn; + unsigned long nr_chunk; + struct kvm_cma *cma = &kvm_cma_area; + + if (!cma || !pages) + return false; + + pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + + VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); + nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + + mutex_lock(&kvm_cma_mutex); + bitmap_clear(cma->bitmap, + (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), + nr_chunk); + free_contig_range(pfn, nr_pages); + mutex_unlock(&kvm_cma_mutex); + + return true; +} + +static int __init kvm_cma_activate_area(unsigned long base_pfn, + unsigned long count) +{ + unsigned long pfn = base_pfn; + unsigned i = count >> pageblock_order; + struct zone *zone; + + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + do { + unsigned j; + base_pfn = pfn; + for (j = pageblock_nr_pages; j; --j, pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + /* + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. + */ + if (page_zone(pfn_to_page(pfn)) != zone) + return -EINVAL; + } + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); + } while (--i); + return 0; +} + +static int __init kvm_cma_init_reserved_areas(void) +{ + int bitmap_size, ret; + unsigned long chunk_count; + struct kvm_cma *cma = &kvm_cma_area; + + pr_debug("%s()\n", __func__); + if (!cma->count) + return 0; + chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!cma->bitmap) + return -ENOMEM; + + ret = kvm_cma_activate_area(cma->base_pfn, cma->count); + if (ret) + goto error; + return 0; + +error: + kfree(cma->bitmap); + return ret; +} +core_initcall(kvm_cma_init_reserved_areas); diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h new file mode 100644 index 00000000000..655144f75fa --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.h @@ -0,0 +1,27 @@ +/* + * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA + * for DMA mapping framework + * + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ + +#ifndef __POWERPC_KVM_CMA_ALLOC_H__ +#define __POWERPC_KVM_CMA_ALLOC_H__ +/* + * Both RMA and Hash page allocation will be multiple of 256K. + */ +#define KVM_CMA_CHUNK_ORDER 18 + +extern struct page *kvm_alloc_cma(unsigned long nr_pages, + unsigned long align_pages); +extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); +extern long kvm_cma_declare_contiguous(phys_addr_t size, + phys_addr_t alignment) __init; +#endif diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fc25689a9f3..45e30d6e462 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } +/* + * tlbie/tlbiel is a bit different on the PPC970 compared to later + * processors such as POWER7; the large page bit is in the instruction + * not RB, and the top 16 bits and the bottom 12 bits of the VA + * in RB must be 0. + */ +static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) +{ + long i; + + if (global) { + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) { + unsigned long rb = rbvalues[i]; + + if (rb & 1) /* large page */ + asm volatile("tlbie %0,1" : : + "r" (rb & 0x0000fffffffff000ul)); + else + asm volatile("tlbie %0,0" : : + "r" (rb & 0x0000fffffffff000ul)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) { + unsigned long rb = rbvalues[i]; + + if (rb & 1) /* large page */ + asm volatile("tlbiel %0,1" : : + "r" (rb & 0x0000fffffffff000ul)); + else + asm volatile("tlbiel %0,0" : : + "r" (rb & 0x0000fffffffff000ul)); + } + asm volatile("ptesync" : : : "memory"); + } +} + +static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) +{ + long i; + + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970 tlbie instruction is a bit different */ + do_tlbies_970(kvm, rbvalues, npages, global, need_sync); + return; + } + if (global) { + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) + asm volatile(PPC_TLBIE(%1,%0) : : + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) + asm volatile("tlbiel %0" : : "r" (rbvalues[i])); + asm volatile("ptesync" : : : "memory"); + } +} + long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) @@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~HPTE_V_VALID; rb = compute_tlbie_rb(v, hpte[1], pte_index); - if (global_invalidates(kvm, flags)) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - asm volatile("tlbiel %0" : : "r" (rb)); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); } @@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) unsigned long *hp, *hptes[4], tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; - long int local = 0; + int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; - if (atomic_read(&kvm->online_vcpus) == 1) - local = 1; + global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; for (; i < 4; ++i) { @@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) break; /* Now that we've collected a batch, do the tlbies */ - if (!local) { - while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - for (k = 0; k < n; ++k) - asm volatile(PPC_TLBIE(%1,%0) : : - "r" (tlbrb[k]), - "r" (kvm->arch.lpid)); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - for (k = 0; k < n; ++k) - asm volatile("tlbiel %0" : : "r" (tlbrb[k])); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, tlbrb, n, global, true); /* Read PTE low words after tlbie to get final R/C values */ for (k = 0; k < n; ++k) { @@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = v & ~HPTE_V_VALID; - if (global_invalidates(kvm, flags)) { - while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - asm volatile("tlbiel %0" : : "r" (rb)); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest * wants to make it read/write, reduce the permissions. @@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, hptep[0] &= ~HPTE_V_VALID; rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; + do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); @@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, rbyte = (hptep[1] & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; + do_tlbies(kvm, &rb, 1, 1, false); } EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b02f91e4c70..60dce5bfab3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1381,7 +1381,7 @@ hcall_try_real_mode: cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont LOAD_REG_ADDR(r4, hcall_real_table) - lwzx r3,r3,r4 + lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont add r3,r3,r4 diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 48cbbf86295..17cfae5497a 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -92,6 +92,11 @@ kvm_start_lightweight: PPC_LL r3, VCPU_HFLAGS(r4) rldicl r3, r3, 0, 63 /* r3 &= 1 */ stb r3, HSTATE_RESTORE_HID5(r13) + + /* Load up guest SPRG3 value, since it's user readable */ + ld r3, VCPU_SHARED(r4) + ld r3, VCPU_SHARED_SPRG3(r3) + mtspr SPRN_SPRG3, r3 #endif /* CONFIG_PPC_BOOK3S_64 */ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ @@ -123,6 +128,15 @@ kvmppc_handler_highmem: /* R7 = vcpu */ PPC_LL r7, GPR4(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * Reload kernel SPRG3 value. + * No need to save guest value as usermode can't modify SPRG3. + */ + ld r3, PACA_SPRG3(r13) + mtspr SPRN_SPRG3, r3 +#endif /* CONFIG_PPC_BOOK3S_64 */ + PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) PPC_STL r16, VCPU_GPR(R16)(r7) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 19498a567a8..27db1e66595 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) * both the traditional FP registers and the added VSX * registers into thread.fpr[]. */ - giveup_fpu(current); + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; @@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - giveup_altivec(current); + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); vcpu->arch.vscr = t->vscr; } @@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, printk(KERN_INFO "Loading up ext 0x%lx\n", msr); #endif - current->thread.regs->msr |= msr; - if (msr & MSR_FP) { for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; @@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #endif } + current->thread.regs->msr |= msr; vcpu->arch.guest_owned_ext |= msr; kvmppc_recalc_shadow_msr(vcpu); return RESUME_GUEST; } +/* + * Kernel code using FP or VMX could have flushed guest state to + * the thread_struct; if so, get it back now. + */ +static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) +{ + unsigned long lost_ext; + + lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; + if (!lost_ext) + return; + + if (lost_ext & MSR_FP) + kvmppc_load_up_fpu(); + if (lost_ext & MSR_VEC) + kvmppc_load_up_altivec(); + current->thread.regs->msr |= lost_ext; +} + int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { @@ -772,7 +792,7 @@ program_interrupt: } case BOOK3S_INTERRUPT_SYSCALL: if (vcpu->arch.papr_enabled && - (kvmppc_get_last_inst(vcpu) == 0x44000022) && + (kvmppc_get_last_sc(vcpu) == 0x44000022) && !(vcpu->arch.shared->msr & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -890,8 +910,9 @@ program_interrupt: local_irq_enable(); r = s; } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } + kvmppc_handle_lost_ext(vcpu); } trace_kvm_book3s_reenter(r, vcpu); @@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_shadow_vcpu; + err = -ENOMEM; p = __get_free_page(GFP_KERNEL|__GFP_ZERO); - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); if (!p) goto uninit_vcpu; + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); #ifdef CONFIG_PPC_BOOK3S_64 /* default to book3s_64 (970fx) */ @@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 94c1dd46b83..a3a5cb8ee7e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -19,6 +19,7 @@ #include <asm/hvcall.h> #include <asm/xics.h> #include <asm/debug.h> +#include <asm/time.h> #include <linux/debugfs.h> #include <linux/seq_file.h> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index dcc94f01600..17722d82f1d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; } - kvm_guest_enter(); - #ifdef CONFIG_PPC_FPU /* Save userspace FPU state in stack */ enable_kernel_fp(); @@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_load_guest_fp(vcpu); #endif - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ae63ae4a1a5..f55e14cd176 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) kvm_guest_exit(); continue; } - - trace_hardirqs_on(); #endif kvm_guest_enter(); |