From 1f6a93e4c35e75d547b51f56ba8139ab1a91628c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 30 Aug 2008 11:40:24 +1000 Subject: powerpc: Make it possible to move the interrupt handlers away from the kernel This changes the way that the exception prologs transfer control to the handlers in 64-bit kernels with the aim of making it possible to have the prologs separate from the main body of the kernel. Now, instead of computing the address of the handler by taking the top 32 bits of the paca address (to get the 0xc0000000........ part) and ORing in something in the bottom 16 bits, we get the base address of the kernel by doing a load from the paca and add an offset. This also replaces an mfmsr and an ori to compute the MSR value for the handler with a load from the paca. That makes it unnecessary to have a separate version of EXCEPTION_PROLOG_PSERIES that forces 64-bit mode. We can no longer use a direct branches in the exception prolog code, which means that the SLB miss handlers can't branch directly to .slb_miss_realmode any more. Instead we have to compute the address and do an indirect branch. This is conditional on CONFIG_RELOCATABLE; for non-relocatable kernels we use a direct branch as before. (A later change will allow CONFIG_RELOCATABLE to be set on 64-bit powerpc.) Since the secondary CPUs on pSeries start execution in the first 0x100 bytes of real memory and then have to get to wherever the kernel is, we can't use a direct branch to get there. Instead this changes __secondary_hold_spinloop from a flag to a function pointer. When it is set to a non-NULL value, the secondary CPUs jump to the function pointed to by that value. Finally this eliminates one code difference between 32-bit and 64-bit by making __secondary_hold be the text address of the secondary CPU spinloop rather than a function descriptor for it. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel/asm-offsets.c') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 92768d3006f..e9c4044012b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -122,6 +122,8 @@ int main(void) DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); + DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); + DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); -- cgit v1.2.3-70-g09d2 From 4ee7084eb11e00eb02dc8435fd18273a61ffa9bf Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 24 Sep 2008 11:01:24 -0500 Subject: POWERPC: Allow 32-bit hashed pgtable code to support 36-bit physical This rearranges a bit of code, and adds support for 36-bit physical addressing for configs that use a hashed page table. The 36b physical support is not enabled by default on any config - it must be explicitly enabled via the config system. This patch *only* expands the page table code to accomodate large physical addresses on 32-bit systems and enables the PHYS_64BIT config option for 86xx. It does *not* allow you to boot a board with more than about 3.5GB of RAM - for that, SWIOTLB support is also required (and coming soon). Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/io.h | 2 +- arch/powerpc/include/asm/page_32.h | 8 ++- arch/powerpc/include/asm/pgtable-ppc32.h | 17 ++++++- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/head_32.S | 4 +- arch/powerpc/kernel/head_fsl_booke.S | 2 - arch/powerpc/mm/hash_low_32.S | 86 ++++++++++++++++++++++++++------ arch/powerpc/mm/pgtable_32.c | 4 +- arch/powerpc/mm/tlb_32.c | 1 + arch/powerpc/platforms/Kconfig.cputype | 17 ++++--- 10 files changed, 109 insertions(+), 33 deletions(-) (limited to 'arch/powerpc/kernel/asm-offsets.c') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 77c7fa025e6..08266d2728b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -711,7 +711,7 @@ static inline void * phys_to_virt(unsigned long address) /* * Change "struct page" to physical address. */ -#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) /* We do NOT want virtual merging, it would put too much pressure on * our iommu allocator. Instead, we want drivers to be smart enough diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae530a37..d77072a32cc 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -13,10 +13,16 @@ #define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES #endif +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ +#else +#define PTE_FLAGS_OFFSET 0 +#endif + #ifndef __ASSEMBLY__ /* * The basic type of a PTE - 64 bits for those CPUs with > 32 bit - * physical addressing. For now this just the IBM PPC440. + * physical addressing. */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index c2e58b41bc7..29c83d85b04 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -369,7 +369,12 @@ extern int icache_44x_need_flush; #define _PAGE_RW 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ +#ifdef CONFIG_PTE_64BIT +/* We never clear the high word of the pte */ +#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) +#else #define _PTE_NONE_MASK _PAGE_HASHPTE +#endif #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) @@ -587,6 +592,10 @@ extern int flush_hash_pages(unsigned context, unsigned long va, extern void add_hash_page(unsigned context, unsigned long va, unsigned long pmdval); +/* Flush an entry from the TLB/hash table */ +extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, + unsigned long address); + /* * Atomic PTE updates. * @@ -665,9 +674,13 @@ static inline unsigned long long pte_update(pte_t *p, static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { -#if _PAGE_HASHPTE != 0 +#if (_PAGE_HASHPTE != 0) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT) pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte) & ~_PAGE_HASHPTE); #elif defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) +#if _PAGE_HASHPTE != 0 + if (pte_val(*ptep) & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); +#endif __asm__ __volatile__("\ stw%U0%X0 %2,%0\n\ eieio\n\ @@ -675,7 +688,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); #else - *ptep = pte; + *ptep = (*ptep & _PAGE_HASHPTE) | (pte & ~_PAGE_HASHPTE); #endif } diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index e9c4044012b..09febc58258 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -352,6 +352,7 @@ int main(void) #endif DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); + DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM DEFINE(TLBE_BYTES, sizeof(struct tlbe)); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 8bb65751929..a6de6dbc5ed 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -369,13 +369,13 @@ i##n: \ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) andis. r0,r10,0xa470 /* weird error? */ bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page -1: stw r10,_DSISR(r11) - mr r5,r10 +1: lwz r5,_DSISR(r11) /* get DSISR value */ mfspr r4,SPRN_DAR EXC_XFER_EE_LITE(0x300, handle_page_fault) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 377e0c155c9..18c0093f932 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -422,7 +422,6 @@ skpinv: addi r6,r6,1 /* Increment */ * r12 is pointer to the pte */ #ifdef CONFIG_PTE_64BIT -#define PTE_FLAGS_OFFSET 4 #define FIND_PTE \ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ @@ -431,7 +430,6 @@ skpinv: addi r6,r6,1 /* Increment */ rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #else -#define PTE_FLAGS_OFFSET 0 #define FIND_PTE \ rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index c41d658176a..7bffb70b9fe 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -75,7 +75,7 @@ _GLOBAL(hash_page_sync) * Returns to the caller if the access is illegal or there is no * mapping for the address. Otherwise it places an appropriate PTE * in the hash table and returns from the exception. - * Uses r0, r3 - r8, ctr, lr. + * Uses r0, r3 - r8, r10, ctr, lr. */ .text _GLOBAL(hash_page) @@ -106,9 +106,15 @@ _GLOBAL(hash_page) addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ 112: add r5,r5,r7 /* convert to phys addr */ +#ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ rlwinm. r8,r8,0,0,19 /* extract address of pte page */ +#else + rlwinm r8,r4,13,19,29 /* Compute pgdir/pmd offset */ + lwzx r8,r8,r5 /* Get L1 entry */ + rlwinm. r8,r8,0,0,20 /* extract pt base address */ +#endif #ifdef CONFIG_SMP beq- hash_page_out /* return if no mapping */ #else @@ -118,7 +124,11 @@ _GLOBAL(hash_page) to the address following the rfi. */ beqlr- #endif +#ifndef CONFIG_PTE_64BIT rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ +#else + rlwimi r8,r4,23,20,28 /* compute pte address */ +#endif rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE @@ -127,9 +137,15 @@ _GLOBAL(hash_page) * because almost always, there won't be a permission violation * and there won't already be an HPTE, and thus we will have * to update the PTE to set _PAGE_HASHPTE. -- paulus. + * + * If PTE_64BIT is set, the low word is the flags word; use that + * word for locking since it contains all the interesting bits. */ +#if (PTE_FLAGS_OFFSET != 0) + addi r8,r8,PTE_FLAGS_OFFSET +#endif retry: - lwarx r6,0,r8 /* get linux-style pte */ + lwarx r6,0,r8 /* get linux-style pte, flag word */ andc. r5,r3,r6 /* check access & ~permission */ #ifdef CONFIG_SMP bne- hash_page_out /* return if access not permitted */ @@ -137,6 +153,15 @@ retry: bnelr- #endif or r5,r0,r6 /* set accessed/dirty bits */ +#ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_SMP + subf r10,r6,r8 /* create false data dependency */ + subi r10,r10,PTE_FLAGS_OFFSET + lwzx r10,r6,r10 /* Get upper PTE word */ +#else + lwz r10,-PTE_FLAGS_OFFSET(r8) +#endif /* CONFIG_SMP */ +#endif /* CONFIG_PTE_64BIT */ stwcx. r5,0,r8 /* attempt to update PTE */ bne- retry /* retry if someone got there first */ @@ -203,9 +228,9 @@ _GLOBAL(add_hash_page) * we can't take a hash table miss (assuming the code is * covered by a BAT). -- paulus */ - mfmsr r10 + mfmsr r9 SYNC - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */ rlwinm r0,r0,0,28,26 /* clear MSR_DR */ mtmsr r0 SYNC_601 @@ -214,14 +239,14 @@ _GLOBAL(add_hash_page) tophys(r7,0) #ifdef CONFIG_SMP - addis r9,r7,mmu_hash_lock@ha - addi r9,r9,mmu_hash_lock@l -10: lwarx r0,0,r9 /* take the mmu_hash_lock */ + addis r6,r7,mmu_hash_lock@ha + addi r6,r6,mmu_hash_lock@l +10: lwarx r0,0,r6 /* take the mmu_hash_lock */ cmpi 0,r0,0 bne- 11f - stwcx. r8,0,r9 + stwcx. r8,0,r6 beq+ 12f -11: lwz r0,0(r9) +11: lwz r0,0(r6) cmpi 0,r0,0 beq 10b b 11b @@ -234,10 +259,24 @@ _GLOBAL(add_hash_page) * HPTE, so we just unlock and return. */ mr r8,r5 +#ifndef CONFIG_PTE_64BIT rlwimi r8,r4,22,20,29 +#else + rlwimi r8,r4,23,20,28 + addi r8,r8,PTE_FLAGS_OFFSET +#endif 1: lwarx r6,0,r8 andi. r0,r6,_PAGE_HASHPTE bne 9f /* if HASHPTE already set, done */ +#ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_SMP + subf r10,r6,r8 /* create false data dependency */ + subi r10,r10,PTE_FLAGS_OFFSET + lwzx r10,r6,r10 /* Get upper PTE word */ +#else + lwz r10,-PTE_FLAGS_OFFSET(r8) +#endif /* CONFIG_SMP */ +#endif /* CONFIG_PTE_64BIT */ ori r5,r6,_PAGE_HASHPTE stwcx. r5,0,r8 bne- 1b @@ -246,13 +285,15 @@ _GLOBAL(add_hash_page) 9: #ifdef CONFIG_SMP + addis r6,r7,mmu_hash_lock@ha + addi r6,r6,mmu_hash_lock@l eieio li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ + stw r0,0(r6) /* clear mmu_hash_lock */ #endif /* reenable interrupts and DR */ - mtmsr r10 + mtmsr r9 SYNC_601 isync @@ -267,7 +308,8 @@ _GLOBAL(add_hash_page) * r5 contains the linux PTE, r6 contains the old value of the * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the * offset to be added to addresses (0 if the MMU is on, - * -KERNELBASE if it is off). + * -KERNELBASE if it is off). r10 contains the upper half of + * the PTE if CONFIG_PTE_64BIT. * On SMP, the caller should have the mmu_hash_lock held. * We assume that the caller has (or will) set the _PAGE_HASHPTE * bit in the linux PTE in memory. The value passed in r6 should @@ -313,6 +355,11 @@ _GLOBAL(create_hpte) BEGIN_FTR_SECTION ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) +#ifdef CONFIG_PTE_64BIT + /* Put the XPN bits into the PTE */ + rlwimi r8,r10,8,20,22 + rlwimi r8,r10,2,29,29 +#endif /* Construct the high word of the PPC-style PTE (r5) */ rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ @@ -499,14 +546,18 @@ _GLOBAL(flush_hash_pages) isync /* First find a PTE in the range that has _PAGE_HASHPTE set */ +#ifndef CONFIG_PTE_64BIT rlwimi r5,r4,22,20,29 -1: lwz r0,0(r5) +#else + rlwimi r5,r4,23,20,28 +#endif +1: lwz r0,PTE_FLAGS_OFFSET(r5) cmpwi cr1,r6,1 andi. r0,r0,_PAGE_HASHPTE bne 2f ble cr1,19f addi r4,r4,0x1000 - addi r5,r5,4 + addi r5,r5,PTE_SIZE addi r6,r6,-1 b 1b @@ -545,7 +596,10 @@ _GLOBAL(flush_hash_pages) * already clear, we're done (for this pte). If not, * clear it (atomically) and proceed. -- paulus. */ -33: lwarx r8,0,r5 /* fetch the pte */ +#if (PTE_FLAGS_OFFSET != 0) + addi r5,r5,PTE_FLAGS_OFFSET +#endif +33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ @@ -590,7 +644,7 @@ _GLOBAL(flush_hash_patch_B) 8: ble cr1,9f /* if all ptes checked */ 81: addi r6,r6,-1 - addi r5,r5,4 /* advance to next pte */ + addi r5,r5,PTE_SIZE addi r4,r4,0x1000 lwz r0,0(r5) /* check next pte */ cmpwi cr1,r6,1 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 2001abdb191..c31d6d26f0b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -73,7 +73,7 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); #endif /* HAVE_TLBCAM */ #ifdef CONFIG_PTE_64BIT -/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ +/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */ #define PGDIR_ORDER 1 #else #define PGDIR_ORDER 0 @@ -288,7 +288,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) } /* - * Map in all of physical memory starting at KERNELBASE. + * Map in a big chunk of physical memory starting at KERNELBASE. */ void __init mapin_ram(void) { diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index eb4b512d65f..f9a47fee392 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -45,6 +45,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) flush_hash_pages(mm->context.id, addr, ptephys, 1); } } +EXPORT_SYMBOL(flush_hash_entry); /* * Called by ptep_set_access_flags, must flush on CPUs for which the diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7f651273386..439c5ba34ec 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -50,6 +50,7 @@ config 44x select PPC_UDBG_16550 select 4xx_SOC select PPC_PCI_CHOICE + select PHYS_64BIT config E200 bool "Freescale e200" @@ -128,18 +129,20 @@ config FSL_EMB_PERFMON config PTE_64BIT bool - depends on 44x || E500 - default y if 44x - default y if E500 && PHYS_64BIT + depends on 44x || E500 || PPC_86xx + default y if PHYS_64BIT config PHYS_64BIT - bool 'Large physical address support' if E500 - depends on 44x || E500 + bool 'Large physical address support' if E500 || PPC_86xx + depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx select RESOURCES_64BIT - default y if 44x ---help--- This option enables kernel support for larger than 32-bit physical - addresses. This features is not be available on all e500 cores. + addresses. This feature may not be available on all cores. + + If you have more than 3.5GB of RAM or so, you also need to enable + SWIOTLB under Kernel Options for this to work. The actual number + is platform-dependent. If in doubt, say N here. -- cgit v1.2.3-70-g09d2 From 20754c2495a791b5b429c0da63394c86ade978e7 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Jul 2008 13:54:51 -0500 Subject: KVM: ppc: Stop saving host TLB state We're saving the host TLB state to memory on every exit, but never using it. Originally I had thought that we'd want to restore host TLB for heavyweight exits, but that could actually hurt when context switching to an unrelated host process (i.e. not qemu). Since this decreases the performance penalty of all exits, this patch improves guest boot time by about 15%. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 -- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kvm/booke_interrupts.S | 17 +++-------------- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel/asm-offsets.c') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 23bad40b0ea..dc3a7562bae 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -81,8 +81,6 @@ struct kvm_vcpu_arch { struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; /* Pages which are referenced in the shadow TLB. */ struct page *shadow_pages[PPC44x_TLB_SIZE]; - /* Copy of the host's TLB. */ - struct tlbe host_tlb[PPC44x_TLB_SIZE]; u32 host_stack; u32 host_pid; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 92768d3006f..59406495395 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -356,7 +356,6 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); - DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb)); DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 8eaba2613ff..3e88dfa1dbe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -342,26 +342,15 @@ lightweight_exit: andc r6, r5, r6 mtmsr r6 - /* Save the host's non-pinned TLB mappings, and load the guest mappings - * over them. Leave the host's "pinned" kernel mappings in place. */ - /* XXX optimization: use generation count to avoid swapping unmodified - * entries. */ + /* Load the guest mappings, leaving the host's "pinned" kernel mappings + * in place. */ + /* XXX optimization: load only modified guest entries. */ mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ lis r8, tlb_44x_hwater@ha lwz r8, tlb_44x_hwater@l(r8) - addi r3, r4, VCPU_HOST_TLB - 4 addi r9, r4, VCPU_SHADOW_TLB - 4 li r6, 0 1: - /* Save host entry. */ - tlbre r7, r6, PPC44x_TLB_PAGEID - mfspr r5, SPRN_MMUCR - stwu r5, 4(r3) - stwu r7, 4(r3) - tlbre r7, r6, PPC44x_TLB_XLAT - stwu r7, 4(r3) - tlbre r7, r6, PPC44x_TLB_ATTRIB - stwu r7, 4(r3) /* Load guest entry. */ lwzu r7, 4(r9) mtspr SPRN_MMUCR, r7 -- cgit v1.2.3-70-g09d2 From 83aae4a8098eb8a40a2e9dab3714354182143b4f Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Jul 2008 13:54:52 -0500 Subject: KVM: ppc: Write only modified shadow entries into the TLB on exit Track which TLB entries need to be written, instead of overwriting everything below the high water mark. Typically only a single guest TLB entry will be modified in a single exit. Guest boot time performance improvement: about 15%. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 3 +++ arch/powerpc/include/asm/kvm_ppc.h | 3 +++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/44x_tlb.c | 9 ++++++- arch/powerpc/kvm/booke_interrupts.S | 51 ++++++++++++++++++++++++------------- arch/powerpc/kvm/powerpc.c | 15 +++++++++++ 6 files changed, 64 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel/asm-offsets.c') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index dc3a7562bae..4338b03da8f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -82,6 +82,9 @@ struct kvm_vcpu_arch { /* Pages which are referenced in the shadow TLB. */ struct page *shadow_pages[PPC44x_TLB_SIZE]; + /* Track which TLB entries we've modified in the current exit. */ + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + u32 host_stack; u32 host_pid; u32 host_dbcr0; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a8b06879226..8e7e4295990 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -65,6 +65,9 @@ extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, gva_t eend, u32 asid); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); +/* XXX Book E specific */ +extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); + extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 59406495395..1631d670b9e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -357,6 +357,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); + DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index a207d16b9db..06a5fcfc4d3 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -125,6 +125,11 @@ static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, } } +void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) +{ + vcpu->arch.shadow_tlb_mod[i] = 1; +} + /* Caller must ensure that the specified guest TLB entry is safe to insert into * the shadow TLB. */ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, @@ -172,10 +177,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, * use host large pages in the future. */ stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS | PPC44x_TLB_4K; - stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, vcpu->arch.msr & MSR_PR); + kvmppc_tlbe_set_modified(vcpu, victim); KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, @@ -209,6 +214,7 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, kvmppc_44x_shadow_release(vcpu, i); stlbe->word0 = 0; + kvmppc_tlbe_set_modified(vcpu, i); KVMTRACE_5D(STLB_INVAL, vcpu, i, stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, handler); @@ -229,6 +235,7 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) kvmppc_44x_shadow_release(vcpu, i); stlbe->word0 = 0; + kvmppc_tlbe_set_modified(vcpu, i); KVMTRACE_5D(STLB_INVAL, vcpu, i, stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, handler); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 3e88dfa1dbe..564ea32ecba 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -335,7 +335,7 @@ lightweight_exit: lwz r3, VCPU_PID(r4) mtspr SPRN_PID, r3 - /* Prevent all TLB updates. */ + /* Prevent all asynchronous TLB updates. */ mfmsr r5 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l @@ -344,28 +344,45 @@ lightweight_exit: /* Load the guest mappings, leaving the host's "pinned" kernel mappings * in place. */ - /* XXX optimization: load only modified guest entries. */ mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ - lis r8, tlb_44x_hwater@ha - lwz r8, tlb_44x_hwater@l(r8) - addi r9, r4, VCPU_SHADOW_TLB - 4 - li r6, 0 + li r5, PPC44x_TLB_SIZE + lis r5, tlb_44x_hwater@ha + lwz r5, tlb_44x_hwater@l(r5) + mtctr r5 + addi r9, r4, VCPU_SHADOW_TLB + addi r5, r4, VCPU_SHADOW_MOD + li r3, 0 1: + lbzx r7, r3, r5 + cmpwi r7, 0 + beq 3f + /* Load guest entry. */ - lwzu r7, 4(r9) + mulli r11, r3, TLBE_BYTES + add r11, r11, r9 + lwz r7, 0(r11) mtspr SPRN_MMUCR, r7 - lwzu r7, 4(r9) - tlbwe r7, r6, PPC44x_TLB_PAGEID - lwzu r7, 4(r9) - tlbwe r7, r6, PPC44x_TLB_XLAT - lwzu r7, 4(r9) - tlbwe r7, r6, PPC44x_TLB_ATTRIB - /* Increment index. */ - addi r6, r6, 1 - cmpw r6, r8 - blt 1b + lwz r7, 4(r11) + tlbwe r7, r3, PPC44x_TLB_PAGEID + lwz r7, 8(r11) + tlbwe r7, r3, PPC44x_TLB_XLAT + lwz r7, 12(r11) + tlbwe r7, r3, PPC44x_TLB_ATTRIB +3: + addi r3, r3, 1 /* Increment index. */ + bdnz 1b + mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ + /* Clear bitmap of modified TLB entries */ + li r5, PPC44x_TLB_SIZE>>2 + mtctr r5 + addi r5, r4, VCPU_SHADOW_MOD - 4 + li r6, 0 +1: + stwu r6, 4(r5) + bdnz 1b + iccci 0, 0 /* XXX hack */ /* Load some guest volatiles. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b75607180dd..90a6fc422b2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -27,6 +27,7 @@ #include #include #include +#include gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) @@ -307,14 +308,28 @@ static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + int i; + if (vcpu->guest_debug.enabled) kvmppc_load_guest_debug_registers(vcpu); + + /* Mark every guest entry in the shadow TLB entry modified, so that they + * will all be reloaded on the next vcpu run (instead of being + * demand-faulted). */ + for (i = 0; i <= tlb_44x_hwater; i++) + kvmppc_tlbe_set_modified(vcpu, i); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { if (vcpu->guest_debug.enabled) kvmppc_restore_host_debug_state(vcpu); + + /* Don't leave guest TLB entries resident when being de-scheduled. */ + /* XXX It would be nice to differentiate between heavyweight exit and + * sched_out here, since we could avoid the TLB flush for heavyweight + * exits. */ + _tlbia(); } int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, -- cgit v1.2.3-70-g09d2 From 49dd2c492895828a90ecdf889e7fe9cfb40a82a7 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Jul 2008 13:54:53 -0500 Subject: KVM: powerpc: Map guest userspace with TID=0 mappings When we use TID=N userspace mappings, we must ensure that kernel mappings have been destroyed when entering userspace. Using TID=1/TID=0 for kernel/user mappings and running userspace with PID=0 means that userspace can't access the kernel mappings, but the kernel can directly access userspace. The net is that we don't need to flush the TLB on privilege switches, but we do on guest context switches (which are far more infrequent). Guest boot time performance improvement: about 30%. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 4 ++++ arch/powerpc/include/asm/kvm_ppc.h | 9 +++++++++ arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/44x_tlb.c | 39 ++++++++++++++++++++++--------------- arch/powerpc/kvm/booke_guest.c | 2 ++ arch/powerpc/kvm/booke_interrupts.S | 2 +- arch/powerpc/kvm/emulate.c | 2 +- 7 files changed, 41 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel/asm-offsets.c') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4338b03da8f..34b52b7180c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -129,7 +129,11 @@ struct kvm_vcpu_arch { u32 ivor[16]; u32 ivpr; u32 pir; + + u32 shadow_pid; u32 pid; + u32 swap_pid; + u32 pvr; u32 ccr0; u32 ccr1; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8e7e4295990..8931ba729d2 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -64,6 +64,7 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, gva_t eend, u32 asid); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); +extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); /* XXX Book E specific */ extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); @@ -95,4 +96,12 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) kvm_vcpu_block(vcpu); } +static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + if (vcpu->arch.pid != new_pid) { + vcpu->arch.pid = new_pid; + vcpu->arch.swap_pid = 1; + } +} + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1631d670b9e..52649da344f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -369,7 +369,7 @@ int main(void) DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); - DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); + DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 06a5fcfc4d3..3594bbd1f61 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -170,7 +170,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, /* XXX what about AS? */ - stlbe->tid = asid & 0xff; + stlbe->tid = !(asid & 0xff); /* Force TS=1 for all guest mappings. */ /* For now we hardcode 4KB mappings, but it will be important to @@ -190,7 +190,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, gva_t eend, u32 asid) { - unsigned int pid = asid & 0xff; + unsigned int pid = !(asid & 0xff); int i; /* XXX Replace loop with fancy data structures. */ @@ -222,23 +222,30 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, up_write(¤t->mm->mmap_sem); } -/* Invalidate all mappings, so that when they fault back in they will get the - * proper permission bits. */ +/* Invalidate all mappings on the privilege switch after PID has been changed. + * The guest always runs with PID=1, so we must clear the entire TLB when + * switching address spaces. */ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) { int i; - /* XXX Replace loop with fancy data structures. */ - down_write(¤t->mm->mmap_sem); - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); + if (vcpu->arch.swap_pid) { + /* XXX Replace loop with fancy data structures. */ + down_write(¤t->mm->mmap_sem); + for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + + /* Future optimization: clear only userspace mappings. */ + kvmppc_44x_shadow_release(vcpu, i); + stlbe->word0 = 0; + kvmppc_tlbe_set_modified(vcpu, i); + KVMTRACE_5D(STLB_INVAL, vcpu, i, + stlbe->tid, stlbe->word0, stlbe->word1, + stlbe->word2, handler); + } + up_write(¤t->mm->mmap_sem); + vcpu->arch.swap_pid = 0; } - up_write(¤t->mm->mmap_sem); + + vcpu->arch.shadow_pid = !usermode; } diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 3cca079975e..7b2591e26ba 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -486,6 +486,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.msr = 0; vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ + vcpu->arch.shadow_pid = 1; + /* Eye-catching number so we know if the guest takes an interrupt * before it's programmed its own IVPR. */ vcpu->arch.ivpr = 0x55550000; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 564ea32ecba..95e165baf85 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -332,7 +332,7 @@ lightweight_exit: mfspr r3, SPRN_PID stw r3, VCPU_HOST_PID(r4) - lwz r3, VCPU_PID(r4) + lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 /* Prevent all asynchronous TLB updates. */ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c3ed63b2221..0fce4fbdc20 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -508,7 +508,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_MMUCR: vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; case SPRN_PID: - vcpu->arch.pid = vcpu->arch.gpr[rs]; break; + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; case SPRN_CCR0: vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; case SPRN_CCR1: -- cgit v1.2.3-70-g09d2