From 74bf4312fff083ab25c3f357cc653ada7995e5f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jan 2006 18:29:18 -0800 Subject: [SPARC64]: Move away from virtual page tables, part 1. We now use the TSB hardware assist features of the UltraSPARC MMUs. SMP is currently knowingly broken, we need to find another place to store the per-cpu base pointers. We hid them away in the TSB base register, and that obviously will not work any more :-) Another known broken case is non-8KB base page size. Also noticed that flush_tlb_all() is not referenced anywhere, only the internal __flush_tlb_all() (local cpu only) is used by the sparc64 port, so we can get rid of flush_tlb_all(). The kernel gets it's own 8KB TSB (swapper_tsb) and each address space gets it's own private 8K TSB. Later we can add code to dynamically increase the size of per-process TSB as the RSS grows. An 8KB TSB is good enough for up to about a 4MB RSS, after which the TSB starts to incur many capacity and conflict misses. We even accumulate OBP translations into the kernel TSB. Another area for refinement is large page size support. We could use a secondary address space TSB to handle those. Signed-off-by: David S. Miller --- arch/sparc64/kernel/itlb_miss.S | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 arch/sparc64/kernel/itlb_miss.S (limited to 'arch/sparc64/kernel/itlb_miss.S') diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S new file mode 100644 index 00000000000..6b6c8fee04b --- /dev/null +++ b/arch/sparc64/kernel/itlb_miss.S @@ -0,0 +1,39 @@ +/* ITLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + brz,pn %g5, kvmap_itlb ! Context 0 processing + nop ! Delay slot (fill me) + ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check + +/* ITLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_itlb ! Miss + mov FAULT_CODE_ITLB, %g3 + andcc %g5, %g4, %g0 ! Executable? + be,pn %xcc, tsb_do_fault + nop ! Delay slot, fill me + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done + nop + +/* ITLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* ITLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop -- cgit v1.2.3-70-g09d2 From 517af33237ecfc3c8a93b335365fa61e741ceca4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Feb 2006 15:55:21 -0800 Subject: [SPARC64]: Access TSB with physical addresses when possible. This way we don't need to lock the TSB into the TLB. The trick is that every TSB load/store is registered into a special instruction patch section. The default uses virtual addresses, and the patch instructions use physical address load/stores. We can't do this on all chips because only cheetah+ and later have the physical variant of the atomic quad load. Signed-off-by: David S. Miller --- arch/sparc64/kernel/dtlb_miss.S | 2 +- arch/sparc64/kernel/itlb_miss.S | 2 +- arch/sparc64/kernel/ktlb.S | 20 ++++----- arch/sparc64/kernel/tsb.S | 35 ++++++++++++--- arch/sparc64/kernel/vmlinux.lds.S | 4 ++ arch/sparc64/mm/init.c | 32 +++++++++++++ arch/sparc64/mm/tsb.c | 95 ++++++++++++++++++++++++++------------- include/asm-sparc64/mmu.h | 3 +- include/asm-sparc64/tsb.h | 94 +++++++++++++++++++++++++++++++++++--- 9 files changed, 234 insertions(+), 53 deletions(-) (limited to 'arch/sparc64/kernel/itlb_miss.S') diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S index d0f1565cb56..2ef6f6e6e72 100644 --- a/arch/sparc64/kernel/dtlb_miss.S +++ b/arch/sparc64/kernel/dtlb_miss.S @@ -4,7 +4,7 @@ srlx %g6, 48, %g5 ! Get context brz,pn %g5, kvmap_dtlb ! Context 0 processing nop ! Delay slot (fill me) - ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry nop ! Push branch to next I$ line cmp %g4, %g6 ! Compare TAG diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S index 6b6c8fee04b..97facce27aa 100644 --- a/arch/sparc64/kernel/itlb_miss.S +++ b/arch/sparc64/kernel/itlb_miss.S @@ -4,7 +4,7 @@ srlx %g6, 48, %g5 ! Get context brz,pn %g5, kvmap_itlb ! Context 0 processing nop ! Delay slot (fill me) - ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry cmp %g4, %g6 ! Compare TAG sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 2b5e71b6888..9b415ab6db6 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -44,14 +44,14 @@ kvmap_itlb_tsb_miss: kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) - TSB_LOCK_TAG(%g1, %g2, %g4) + KTSB_LOCK_TAG(%g1, %g2, %g4) /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 brgez,a,pn %g5, kvmap_itlb_longpath - stx %g0, [%g1] + KTSB_STORE(%g1, %g0) - TSB_WRITE(%g1, %g5, %g6) + KTSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -69,9 +69,9 @@ kvmap_itlb_longpath: kvmap_itlb_obp: OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) - TSB_LOCK_TAG(%g1, %g2, %g4) + KTSB_LOCK_TAG(%g1, %g2, %g4) - TSB_WRITE(%g1, %g5, %g6) + KTSB_WRITE(%g1, %g5, %g6) ba,pt %xcc, kvmap_itlb_load nop @@ -79,9 +79,9 @@ kvmap_itlb_obp: kvmap_dtlb_obp: OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) - TSB_LOCK_TAG(%g1, %g2, %g4) + KTSB_LOCK_TAG(%g1, %g2, %g4) - TSB_WRITE(%g1, %g5, %g6) + KTSB_WRITE(%g1, %g5, %g6) ba,pt %xcc, kvmap_dtlb_load nop @@ -114,14 +114,14 @@ kvmap_linear_patch: kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) - TSB_LOCK_TAG(%g1, %g2, %g4) + KTSB_LOCK_TAG(%g1, %g2, %g4) /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 brgez,a,pn %g5, kvmap_dtlb_longpath - stx %g0, [%g1] + KTSB_STORE(%g1, %g0) - TSB_WRITE(%g1, %g5, %g6) + KTSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index e1dd37f5e53..ff6a79beb98 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -53,7 +53,7 @@ tsb_reload: /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 brgez,a,pn %g5, tsb_do_fault - stx %g0, [%g1] + TSB_STORE(%g1, %g0) /* If it is larger than the base page size, don't * bother putting it into the TSB. @@ -64,7 +64,7 @@ tsb_reload: and %g2, %g4, %g2 cmp %g2, %g7 bne,a,pn %xcc, tsb_tlb_reload - stx %g0, [%g1] + TSB_STORE(%g1, %g0) TSB_WRITE(%g1, %g5, %g6) @@ -131,13 +131,13 @@ winfix_trampoline: /* Insert an entry into the TSB. * - * %o0: TSB entry pointer + * %o0: TSB entry pointer (virt or phys address) * %o1: tag * %o2: pte */ .align 32 - .globl tsb_insert -tsb_insert: + .globl __tsb_insert +__tsb_insert: rdpr %pstate, %o5 wrpr %o5, PSTATE_IE, %pstate TSB_LOCK_TAG(%o0, %g2, %g3) @@ -146,6 +146,31 @@ tsb_insert: retl nop + /* Flush the given TSB entry if it has the matching + * tag. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + */ + .align 32 + .globl tsb_flush +tsb_flush: + sethi %hi(TSB_TAG_LOCK_HIGH), %g2 +1: TSB_LOAD_TAG(%o0, %g1) + srlx %g1, 32, %o3 + andcc %o3, %g2, %g0 + bne,pn %icc, 1b + membar #LoadLoad + cmp %g1, %o1 + bne,pt %xcc, 2f + clr %o3 + TSB_CAS_TAG(%o0, %g1, %o3) + cmp %g1, %o3 + bne,pn %xcc, 1b + nop +2: retl + TSB_MEMBAR + /* Reload MMU related context switch state at * schedule() time. * diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 467d13a0d5c..71b943f1c9b 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -70,6 +70,10 @@ SECTIONS .con_initcall.init : { *(.con_initcall.init) } __con_initcall_end = .; SECURITY_INIT + . = ALIGN(4); + __tsb_phys_patch = .; + .tsb_phys_patch : { *(.tsb_phys_patch) } + __tsb_phys_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 2c21d85de78..4893f3e2c33 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -39,6 +39,7 @@ #include #include #include +#include extern void device_scan(void); @@ -244,6 +245,16 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c : "g1", "g7"); } +static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte) +{ + unsigned long tsb_addr = (unsigned long) ent; + + if (tlb_type == cheetah_plus) + tsb_addr = __pa(tsb_addr); + + __tsb_insert(tsb_addr, tag, pte); +} + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { struct mm_struct *mm; @@ -1040,6 +1051,24 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +static void __init tsb_phys_patch(void) +{ + struct tsb_phys_patch_entry *p; + + p = &__tsb_phys_patch; + while (p < &__tsb_phys_patch_end) { + unsigned long addr = p->addr; + + *(unsigned int *) addr = p->insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + p++; + } +} + /* paging_init() sets up the page tables */ extern void cheetah_ecache_flush_init(void); @@ -1052,6 +1081,9 @@ void __init paging_init(void) unsigned long end_pfn, pages_avail, shift; unsigned long real_end, i; + if (tlb_type == cheetah_plus) + tsb_phys_patch(); + /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index 1c4e5c2dfc5..787533f0104 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -20,12 +20,9 @@ static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries return vaddr & (nentries - 1); } -static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context) +static inline int tag_compare(unsigned long tag, unsigned long vaddr, unsigned long context) { - if (context == ~0UL) - return 1; - - return (entry->tag == ((vaddr >> 22) | (context << 48))); + return (tag == ((vaddr >> 22) | (context << 48))); } /* TSB flushes need only occur on the processor initiating the address @@ -41,7 +38,7 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); struct tsb *ent = &swapper_tsb[hash]; - if (tag_compare(ent, v, 0)) { + if (tag_compare(ent->tag, v, 0)) { ent->tag = 0UL; membar_storeload_storestore(); } @@ -52,24 +49,31 @@ void flush_tsb_user(struct mmu_gather *mp) { struct mm_struct *mm = mp->mm; struct tsb *tsb = mm->context.tsb; - unsigned long ctx = ~0UL; unsigned long nentries = mm->context.tsb_nentries; + unsigned long ctx, base; int i; - if (CTX_VALID(mm->context)) - ctx = CTX_HWBITS(mm->context); + if (unlikely(!CTX_VALID(mm->context))) + return; + + ctx = CTX_HWBITS(mm->context); + if (tlb_type == cheetah_plus) + base = __pa(tsb); + else + base = (unsigned long) tsb; + for (i = 0; i < mp->tlb_nr; i++) { unsigned long v = mp->vaddrs[i]; - struct tsb *ent; + unsigned long tag, ent, hash; v &= ~0x1UL; - ent = &tsb[tsb_hash(v, nentries)]; - if (tag_compare(ent, v, ctx)) { - ent->tag = 0UL; - membar_storeload_storestore(); - } + hash = tsb_hash(v, nentries); + ent = base + (hash * sizeof(struct tsb)); + tag = (v >> 22UL) | (ctx << 48UL); + + tsb_flush(ent, tag); } } @@ -84,6 +88,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W); tsb_paddr = __pa(mm->context.tsb); + BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); /* Use the smallest page size that can map the whole TSB * in one TLB entry. @@ -144,13 +149,23 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) BUG(); }; - tsb_reg |= base; - tsb_reg |= (tsb_paddr & (page_sz - 1UL)); - tte |= (tsb_paddr & ~(page_sz - 1UL)); + if (tlb_type == cheetah_plus) { + /* Physical mapping, no locked TLB entry for TSB. */ + tsb_reg |= tsb_paddr; + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = 0; + mm->context.tsb_map_pte = 0; + } else { + tsb_reg |= base; + tsb_reg |= (tsb_paddr & (page_sz - 1UL)); + tte |= (tsb_paddr & ~(page_sz - 1UL)); + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = base; + mm->context.tsb_map_pte = tte; + } - mm->context.tsb_reg_val = tsb_reg; - mm->context.tsb_map_vaddr = base; - mm->context.tsb_map_pte = tte; } /* The page tables are locked against modifications while this @@ -168,13 +183,21 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, for (i = 0; i < old_nentries; i++) { register unsigned long tag asm("o4"); register unsigned long pte asm("o5"); - unsigned long v; - unsigned int hash; - - __asm__ __volatile__( - "ldda [%2] %3, %0" - : "=r" (tag), "=r" (pte) - : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD)); + unsigned long v, hash; + + if (tlb_type == cheetah_plus) { + __asm__ __volatile__( + "ldda [%2] %3, %0" + : "=r" (tag), "=r" (pte) + : "r" (__pa(&old_tsb[i])), + "i" (ASI_QUAD_LDD_PHYS)); + } else { + __asm__ __volatile__( + "ldda [%2] %3, %0" + : "=r" (tag), "=r" (pte) + : "r" (&old_tsb[i]), + "i" (ASI_NUCLEUS_QUAD_LDD)); + } if (!tag || (tag & (1UL << TSB_TAG_LOCK_BIT))) continue; @@ -198,8 +221,20 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, v |= (i & (512UL - 1UL)) << 13UL; hash = tsb_hash(v, new_nentries); - new_tsb[hash].tag = tag; - new_tsb[hash].pte = pte; + if (tlb_type == cheetah_plus) { + __asm__ __volatile__( + "stxa %0, [%1] %2\n\t" + "stxa %3, [%4] %2" + : /* no outputs */ + : "r" (tag), + "r" (__pa(&new_tsb[hash].tag)), + "i" (ASI_PHYS_USE_EC), + "r" (pte), + "r" (__pa(&new_tsb[hash].pte))); + } else { + new_tsb[hash].tag = tag; + new_tsb[hash].pte = pte; + } } } diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h index 18f98edfbcd..55e622711b9 100644 --- a/include/asm-sparc64/mmu.h +++ b/include/asm-sparc64/mmu.h @@ -97,7 +97,8 @@ struct tsb { unsigned long pte; } __attribute__((aligned(TSB_ENTRY_ALIGNMENT))); -extern void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte); +extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte); +extern void tsb_flush(unsigned long ent, unsigned long tag); typedef struct { unsigned long sparc64_ctx_val; diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h index f384565212f..44709cde561 100644 --- a/include/asm-sparc64/tsb.h +++ b/include/asm-sparc64/tsb.h @@ -44,7 +44,89 @@ #define TSB_MEMBAR membar #StoreStore +/* Some cpus support physical address quad loads. We want to use + * those if possible so we don't need to hard-lock the TSB mapping + * into the TLB. We encode some instruction patching in order to + * support this. + * + * The kernel TSB is locked into the TLB by virtue of being in the + * kernel image, so we don't play these games for swapper_tsb access. + */ +#ifndef __ASSEMBLY__ +struct tsb_phys_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; +#endif +#define TSB_LOAD_QUAD(TSB, REG) \ +661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \ + .previous + +#define TSB_LOAD_TAG_HIGH(TSB, REG) \ +661: lduwa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + lduwa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_LOAD_TAG(TSB, REG) \ +661: ldxa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + ldxa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \ +661: casa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_CAS_TAG(TSB, REG1, REG2) \ +661: casxa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_STORE(ADDR, VAL) \ +661: stxa VAL, [ADDR] ASI_N; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + stxa VAL, [ADDR] ASI_PHYS_USE_EC; \ + .previous + #define TSB_LOCK_TAG(TSB, REG1, REG2) \ +99: TSB_LOAD_TAG_HIGH(TSB, REG1); \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define TSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + TSB_STORE(TSB, TTE); \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + TSB_STORE(TSB, TAG); + +#define KTSB_LOAD_QUAD(TSB, REG) \ + ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; + +#define KTSB_STORE(ADDR, VAL) \ + stxa VAL, [ADDR] ASI_N; + +#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ 99: lduwa [TSB] ASI_N, REG1; \ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ andcc REG1, REG2, %g0; \ @@ -56,10 +138,12 @@ nop; \ TSB_MEMBAR -#define TSB_WRITE(TSB, TTE, TAG) \ - stx TTE, [TSB + 0x08]; \ - TSB_MEMBAR; \ - stx TAG, [TSB + 0x00]; +#define KTSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + stxa TTE, [TSB] ASI_N; \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + stxa TAG, [TSB] ASI_N; /* Do a kernel page table walk. Leaves physical PTE pointer in * REG1. Jumps to FAIL_LABEL on early page table walk termination. @@ -157,7 +241,7 @@ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ - ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \ + KTSB_LOAD_QUAD(REG2, REG3); \ cmp REG3, TAG; \ be,a,pt %xcc, OK_LABEL; \ mov REG4, REG1; -- cgit v1.2.3-70-g09d2 From c4bce90ea2069e5a87beac806de3090ab32128d5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Feb 2006 21:57:54 -0800 Subject: [SPARC64]: Deal with PTE layout differences in SUN4V. Yes, you heard it right, they changed the PTE layout for SUN4V. Ho hum... This is the simple and inefficient way to support this. It'll get optimized, don't worry. Signed-off-by: David S. Miller --- arch/sparc64/kernel/itlb_miss.S | 4 +- arch/sparc64/kernel/ktlb.S | 12 +- arch/sparc64/kernel/setup.c | 274 -------------- arch/sparc64/kernel/sun4v_tlb_miss.S | 3 +- arch/sparc64/kernel/tsb.S | 9 +- arch/sparc64/lib/clear_page.S | 8 +- arch/sparc64/lib/copy_page.S | 7 +- arch/sparc64/mm/fault.c | 2 +- arch/sparc64/mm/generic.c | 40 +- arch/sparc64/mm/init.c | 703 ++++++++++++++++++++++++++++++----- arch/sparc64/mm/tsb.c | 12 +- include/asm-sparc64/pgtable.h | 266 ++++--------- 12 files changed, 717 insertions(+), 623 deletions(-) (limited to 'arch/sparc64/kernel/itlb_miss.S') diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S index 97facce27aa..730caa4a150 100644 --- a/arch/sparc64/kernel/itlb_miss.S +++ b/arch/sparc64/kernel/itlb_miss.S @@ -6,9 +6,10 @@ nop ! Delay slot (fill me) TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry cmp %g4, %g6 ! Compare TAG - sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check + sethi %hi(PAGE_EXEC), %g4 ! Setup exec check /* ITLB ** ICACHE line 2: TSB compare and TLB load */ + ldx [%g4 + %lo(PAGE_EXEC)], %g4 bne,pn %xcc, tsb_miss_itlb ! Miss mov FAULT_CODE_ITLB, %g3 andcc %g5, %g4, %g0 ! Executable? @@ -16,7 +17,6 @@ nop ! Delay slot, fill me stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB retry ! Trap done - nop /* ITLB ** ICACHE line 3: */ nop diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 2d333ab4b91..47dfd45971e 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -131,16 +131,8 @@ kvmap_dtlb_4v: brgez,pn %g4, kvmap_dtlb_nonlinear nop -#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) -#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - - sethi %uhi(KERN_HIGHBITS), %g2 - or %g2, %ulo(KERN_HIGHBITS), %g2 - sllx %g2, 32, %g2 - or %g2, KERN_LOWBITS, %g2 - -#undef KERN_HIGHBITS -#undef KERN_LOWBITS + sethi %hi(kern_linear_pte_xor), %g2 + ldx [%g2 + %lo(kern_linear_pte_xor)], %g2 .globl kvmap_linear_patch kvmap_linear_patch: diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index f36b257b2e4..ca75f3b26a3 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -64,12 +64,6 @@ struct screen_info screen_info = { 16 /* orig-video-points */ }; -/* Typing sync at the prom prompt calls the function pointed to by - * the sync callback which I set to the following function. - * This should sync all filesystems and return, for now it just - * prints out pretty messages and returns. - */ - void (*prom_palette)(int); void (*prom_keyboard)(void); @@ -79,263 +73,6 @@ prom_console_write(struct console *con, const char *s, unsigned n) prom_write(s, n); } -static struct console prom_console = { - .name = "prom", - .write = prom_console_write, - .flags = CON_CONSDEV | CON_ENABLED, - .index = -1, -}; - -#define PROM_TRUE -1 -#define PROM_FALSE 0 - -/* Pretty sick eh? */ -int prom_callback(long *args) -{ - struct console *cons, *saved_console = NULL; - unsigned long flags; - char *cmd; - extern spinlock_t prom_entry_lock; - - if (!args) - return -1; - if (!(cmd = (char *)args[0])) - return -1; - - /* - * The callback can be invoked on the cpu that first dropped - * into prom_cmdline after taking the serial interrupt, or on - * a slave processor that was smp_captured() if the - * administrator has done a switch-cpu inside obp. In either - * case, the cpu is marked as in-interrupt. Drop IRQ locks. - */ - irq_exit(); - - /* XXX Revisit the locking here someday. This is a debugging - * XXX feature so it isnt all that critical. -DaveM - */ - local_irq_save(flags); - - spin_unlock(&prom_entry_lock); - cons = console_drivers; - while (cons) { - unregister_console(cons); - cons->flags &= ~(CON_PRINTBUFFER); - cons->next = saved_console; - saved_console = cons; - cons = console_drivers; - } - register_console(&prom_console); - if (!strcmp(cmd, "sync")) { - prom_printf("PROM `%s' command...\n", cmd); - show_free_areas(); - if (current->pid != 0) { - local_irq_enable(); - sys_sync(); - local_irq_disable(); - } - args[2] = 0; - args[args[1] + 3] = -1; - prom_printf("Returning to PROM\n"); - } else if (!strcmp(cmd, "va>tte-data")) { - unsigned long ctx, va; - unsigned long tte = 0; - long res = PROM_FALSE; - - ctx = args[3]; - va = args[4]; - if (ctx) { - /* - * Find process owning ctx, lookup mapping. - */ - struct task_struct *p; - struct mm_struct *mm = NULL; - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - - for_each_process(p) { - mm = p->mm; - if (CTX_NRBITS(mm->context) == ctx) - break; - } - if (!mm || - CTX_NRBITS(mm->context) != ctx) - goto done; - - pgdp = pgd_offset(mm, va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_map(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - pte_unmap(ptep); - goto done; - } - - if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) { - if (tlb_type == spitfire) { - extern unsigned long sparc64_kern_pri_context; - - /* Spitfire Errata #32 workaround */ - __asm__ __volatile__( - "stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (sparc64_kern_pri_context), - "r" (PRIMARY_CONTEXT), - "i" (ASI_DMMU)); - } - - /* - * Locked down tlb entry. - */ - - if (tlb_type == spitfire) { - tte = spitfire_get_dtlb_data(SPITFIRE_HIGHEST_LOCKED_TLBENT); - res = PROM_TRUE; - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - tte = cheetah_get_ldtlb_data(CHEETAH_HIGHEST_LOCKED_TLBENT); - res = PROM_TRUE; - } - goto done; - } - - if (va < PGDIR_SIZE) { - /* - * vmalloc or prom_inherited mapping. - */ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - int error; - - if ((va >= LOW_OBP_ADDRESS) && (va < HI_OBP_ADDRESS)) { - tte = prom_virt_to_phys(va, &error); - if (!error) - res = PROM_TRUE; - goto done; - } - pgdp = pgd_offset_k(va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_kernel(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - goto done; - } - - if (va < PAGE_OFFSET) { - /* - * No mappings here. - */ - goto done; - } - - if (va & (1UL << 40)) { - /* - * I/O page. - */ - - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_E | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - goto done; - } - - /* - * Normal page. - */ - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - - done: - if (res == PROM_TRUE) { - args[2] = 3; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - args[args[1] + 5] = tte; - } else { - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - } - } else if (!strcmp(cmd, ".soft1")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx:\"%s%s%s%s%s\" ", - (tte & _PAGE_SOFT) >> 7, - tte & _PAGE_MODIFIED ? "M" : "-", - tte & _PAGE_ACCESSED ? "A" : "-", - tte & _PAGE_READ ? "W" : "-", - tte & _PAGE_WRITE ? "R" : "-", - tte & _PAGE_PRESENT ? "P" : "-"); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else if (!strcmp(cmd, ".soft2")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx ", (tte & 0x07FC000000000000UL) >> 50); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else { - prom_printf("unknown PROM `%s' command...\n", cmd); - } - unregister_console(&prom_console); - while (saved_console) { - cons = saved_console; - saved_console = cons->next; - register_console(cons); - } - spin_lock(&prom_entry_lock); - local_irq_restore(flags); - - /* - * Restore in-interrupt status for a resume from obp. - */ - irq_enter(); - return 0; -} - unsigned int boot_flags = 0; #define BOOTME_DEBUG 0x1 #define BOOTME_SINGLE 0x2 @@ -483,17 +220,6 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void register_prom_callbacks(void) -{ - prom_setcallback(prom_callback); - prom_feval(": linux-va>tte-data 2 \" va>tte-data\" $callback drop ; " - "' linux-va>tte-data to va>tte-data"); - prom_feval(": linux-.soft1 1 \" .soft1\" $callback 2drop ; " - "' linux-.soft1 to .soft1"); - prom_feval(": linux-.soft2 1 \" .soft2\" $callback 2drop ; " - "' linux-.soft2 to .soft2"); -} - static void __init per_cpu_patch(void) { #ifdef CONFIG_SMP diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S index 597359ced23..950ca74b4a5 100644 --- a/arch/sparc64/kernel/sun4v_tlb_miss.S +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -59,7 +59,8 @@ sun4v_itlb_miss: /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ ldda [%g1] ASI_QUAD_LDD_PHYS, %g2 cmp %g2, %g6 - sethi %hi(_PAGE_EXEC), %g7 + sethi %hi(PAGE_EXEC), %g7 + ldx [%g7 + %lo(PAGE_EXEC)], %g7 bne,a,pn %xcc, tsb_miss_page_table_walk mov FAULT_CODE_ITLB, %g3 andcc %g3, %g7, %g0 diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 667dcb077be..be8f0892d72 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -56,10 +56,11 @@ tsb_reload: /* If it is larger than the base page size, don't * bother putting it into the TSB. */ - srlx %g5, 32, %g2 - sethi %hi(_PAGE_ALL_SZ_BITS >> 32), %g7 - and %g2, %g7, %g2 - sethi %hi(_PAGE_SZBITS >> 32), %g7 + sethi %hi(_PAGE_ALL_SZ_BITS), %g7 + ldx [%g7 + %lo(_PAGE_ALL_SZ_BITS)], %g7 + and %g5, %g7, %g2 + sethi %hi(_PAGE_SZBITS), %g7 + ldx [%g7 + %lo(_PAGE_SZBITS)], %g7 cmp %g2, %g7 bne,a,pn %xcc, tsb_tlb_reload TSB_STORE(%g1, %g0) diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S index cdc634bceba..77e531f6c2a 100644 --- a/arch/sparc64/lib/clear_page.S +++ b/arch/sparc64/lib/clear_page.S @@ -23,9 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) - .text .globl _clear_page @@ -44,12 +41,11 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ sethi %hi(PAGE_SIZE), %o4 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o1, %o4, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! TTE data diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S index feebb14fd27..37460666a5c 100644 --- a/arch/sparc64/lib/copy_page.S +++ b/arch/sparc64/lib/copy_page.S @@ -23,8 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) #define DCACHE_SIZE (PAGE_SIZE * 2) #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) @@ -52,13 +50,12 @@ copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ sethi %hi(PAGE_SIZE), %o3 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! dest paddr sub %o1, %g2, %g2 ! src paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o2, %o3, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! dest TTE data diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 6f0539aa44d..439a53c1e56 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -137,7 +137,7 @@ static unsigned int get_user_insn(unsigned long tpc) if (!pte_present(pte)) goto out; - pa = (pte_val(pte) & _PAGE_PADDR); + pa = (pte_pfn(pte) << PAGE_SHIFT); pa += (tpc & ~PAGE_MASK); /* Use phys bypass so we don't pollute dtlb/dcache. */ diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index 580b63da836..5fc5c579e35 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -15,15 +15,6 @@ #include #include -static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space) -{ - pte_t pte; - pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) & - ~(unsigned long)_PAGE_CACHE); - pte_val(pte) |= (((unsigned long)space) << 32); - return pte; -} - /* Remap IO memory, the same way as remap_pfn_range(), but use * the obio memory space. * @@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, pte_t entry; unsigned long curend = address + PAGE_SIZE; - entry = mk_pte_io(offset, prot, space); + entry = mk_pte_io(offset, prot, space, PAGE_SIZE); if (!(address & 0xffff)) { - if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), - space); + if (PAGE_SIZE < (4 * 1024 * 1024) && + !(address & 0x3fffff) && + !(offset & 0x3ffffe) && + end >= address + 0x400000) { + entry = mk_pte_io(offset, prot, space, + 4 * 1024 * 1024); curend = address + 0x400000; offset += 0x400000; - } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ512K), - space); + } else if (PAGE_SIZE < (512 * 1024) && + !(address & 0x7ffff) && + !(offset & 0x7fffe) && + end >= address + 0x80000) { + entry = mk_pte_io(offset, prot, space, + 512 * 1024 * 1024); curend = address + 0x80000; offset += 0x80000; - } else if (!(offset & 0xfffe) && end >= address + 0x10000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ64K), - space); + } else if (PAGE_SIZE < (64 * 1024) && + !(offset & 0xfffe) && + end >= address + 0x10000) { + entry = mk_pte_io(offset, prot, space, + 64 * 1024); curend = address + 0x10000; offset += 0x10000; } else diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 92756da273b..9c2fc239f3e 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -118,6 +119,7 @@ unsigned long phys_base __read_mostly; unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; unsigned long pfn_base __read_mostly; +unsigned long kern_linear_pte_xor __read_mostly; /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); @@ -256,6 +258,9 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long __tsb_insert(tsb_addr, tag, pte); } +unsigned long _PAGE_ALL_SZ_BITS __read_mostly; +unsigned long _PAGE_SZBITS __read_mostly; + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { struct mm_struct *mm; @@ -398,39 +403,9 @@ struct linux_prom_translation { struct linux_prom_translation prom_trans[512] __read_mostly; unsigned int prom_trans_ents __read_mostly; -extern unsigned long prom_boot_page; -extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); -extern int prom_get_mmu_ihandle(void); -extern void register_prom_callbacks(void); - /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* - * Translate PROM's mapping we capture at boot time into physical address. - * The second parameter is only set from prom_callback() invocations. - */ -unsigned long prom_virt_to_phys(unsigned long promva, int *error) -{ - int i; - - for (i = 0; i < prom_trans_ents; i++) { - struct linux_prom_translation *p = &prom_trans[i]; - - if (promva >= p->virt && - promva < (p->virt + p->size)) { - unsigned long base = p->data & _PAGE_PADDR; - - if (error) - *error = 0; - return base + (promva & (8192 - 1)); - } - } - if (error) - *error = 1; - return 0UL; -} - /* The obp translations are saved based on 8k pagesize, since obp can * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> * HI_OBP_ADDRESS range are handled in ktlb.S. @@ -537,6 +512,8 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr, "3" (arg2), "4" (arg3)); } +static unsigned long kern_large_tte(unsigned long paddr); + static void __init remap_kernel(void) { unsigned long phys_page, tte_vaddr, tte_data; @@ -544,9 +521,7 @@ static void __init remap_kernel(void) tte_vaddr = (unsigned long) KERNBASE; phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; - tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | - _PAGE_L | _PAGE_W)); + tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; @@ -591,10 +566,6 @@ static void __init inherit_prom_mappings(void) prom_printf("Remapping the kernel... "); remap_kernel(); prom_printf("done.\n"); - - prom_printf("Registering callbacks... "); - register_prom_callbacks(); - prom_printf("done.\n"); } void prom_world(int enter) @@ -631,63 +602,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end) } #endif /* DCACHE_ALIASING_POSSIBLE */ -/* If not locked, zap it. */ -void __flush_tlb_all(void) -{ - unsigned long pstate; - int i; - - __asm__ __volatile__("flushw\n\t" - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate" - : "=r" (pstate) - : "i" (PSTATE_IE)); - if (tlb_type == spitfire) { - for (i = 0; i < 64; i++) { - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - } - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_itlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - spitfire_put_itlb_data(i, 0x0UL); - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - cheetah_flush_dtlb_all(); - cheetah_flush_itlb_all(); - } - __asm__ __volatile__("wrpr %0, 0, %%pstate" - : : "r" (pstate)); -} - /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. * @@ -1180,6 +1094,9 @@ extern void sun4v_patch_tlb_handlers(void); static unsigned long last_valid_pfn; pgd_t swapper_pg_dir[2048]; +static void sun4u_pgprot_init(void); +static void sun4v_pgprot_init(void); + void __init paging_init(void) { unsigned long end_pfn, pages_avail, shift; @@ -1188,6 +1105,11 @@ void __init paging_init(void) kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; + if (tlb_type == hypervisor) + sun4v_pgprot_init(); + else + sun4u_pgprot_init(); + if (tlb_type == cheetah_plus || tlb_type == hypervisor) tsb_phys_patch(); @@ -1411,3 +1333,596 @@ void free_initrd_mem(unsigned long start, unsigned long end) } } #endif + +/* SUN4U pte bits... */ +#define _PAGE_SZ4MB_4U 0x6000000000000000 /* 4MB Page */ +#define _PAGE_SZ512K_4U 0x4000000000000000 /* 512K Page */ +#define _PAGE_SZ64K_4U 0x2000000000000000 /* 64K Page */ +#define _PAGE_SZ8K_4U 0x0000000000000000 /* 8K Page */ +#define _PAGE_NFO_4U 0x1000000000000000 /* No Fault Only */ +#define _PAGE_IE_4U 0x0800000000000000 /* Invert Endianness */ +#define _PAGE_SOFT2_4U 0x07FC000000000000 /* Software bits, set 2 */ +#define _PAGE_RES1_4U 0x0002000000000000 /* Reserved */ +#define _PAGE_SZ32MB_4U 0x0001000000000000 /* (Panther) 32MB page */ +#define _PAGE_SZ256MB_4U 0x2001000000000000 /* (Panther) 256MB page */ +#define _PAGE_SN_4U 0x0000800000000000 /* (Cheetah) Snoop */ +#define _PAGE_RES2_4U 0x0000780000000000 /* Reserved */ +#define _PAGE_PADDR_4U 0x000007FFFFFFE000 /* (Cheetah) paddr[42:13] */ +#define _PAGE_SOFT_4U 0x0000000000001F80 /* Software bits: */ +#define _PAGE_EXEC_4U 0x0000000000001000 /* Executable SW bit */ +#define _PAGE_MODIFIED_4U 0x0000000000000800 /* Modified (dirty) */ +#define _PAGE_FILE_4U 0x0000000000000800 /* Pagecache page */ +#define _PAGE_ACCESSED_4U 0x0000000000000400 /* Accessed (ref'd) */ +#define _PAGE_READ_4U 0x0000000000000200 /* Readable SW Bit */ +#define _PAGE_WRITE_4U 0x0000000000000100 /* Writable SW Bit */ +#define _PAGE_PRESENT_4U 0x0000000000000080 /* Present */ +#define _PAGE_L_4U 0x0000000000000040 /* Locked TTE */ +#define _PAGE_CP_4U 0x0000000000000020 /* Cacheable in P-Cache */ +#define _PAGE_CV_4U 0x0000000000000010 /* Cacheable in V-Cache */ +#define _PAGE_E_4U 0x0000000000000008 /* side-Effect */ +#define _PAGE_P_4U 0x0000000000000004 /* Privileged Page */ +#define _PAGE_W_4U 0x0000000000000002 /* Writable */ + +/* SUN4V pte bits... */ +#define _PAGE_NFO_4V 0x4000000000000000 /* No Fault Only */ +#define _PAGE_SOFT2_4V 0x3F00000000000000 /* Software bits, set 2 */ +#define _PAGE_MODIFIED_4V 0x2000000000000000 /* Modified (dirty) */ +#define _PAGE_ACCESSED_4V 0x1000000000000000 /* Accessed (ref'd) */ +#define _PAGE_READ_4V 0x0800000000000000 /* Readable SW Bit */ +#define _PAGE_WRITE_4V 0x0400000000000000 /* Writable SW Bit */ +#define _PAGE_PADDR_4V 0x00FFFFFFFFFFE000 /* paddr[55:13] */ +#define _PAGE_IE_4V 0x0000000000001000 /* Invert Endianness */ +#define _PAGE_E_4V 0x0000000000000800 /* side-Effect */ +#define _PAGE_CP_4V 0x0000000000000400 /* Cacheable in P-Cache */ +#define _PAGE_CV_4V 0x0000000000000200 /* Cacheable in V-Cache */ +#define _PAGE_P_4V 0x0000000000000100 /* Privileged Page */ +#define _PAGE_EXEC_4V 0x0000000000000080 /* Executable Page */ +#define _PAGE_W_4V 0x0000000000000040 /* Writable */ +#define _PAGE_SOFT_4V 0x0000000000000030 /* Software bits */ +#define _PAGE_FILE_4V 0x0000000000000020 /* Pagecache page */ +#define _PAGE_PRESENT_4V 0x0000000000000010 /* Present */ +#define _PAGE_RESV_4V 0x0000000000000008 /* Reserved */ +#define _PAGE_SZ16GB_4V 0x0000000000000007 /* 16GB Page */ +#define _PAGE_SZ2GB_4V 0x0000000000000006 /* 2GB Page */ +#define _PAGE_SZ256MB_4V 0x0000000000000005 /* 256MB Page */ +#define _PAGE_SZ32MB_4V 0x0000000000000004 /* 32MB Page */ +#define _PAGE_SZ4MB_4V 0x0000000000000003 /* 4MB Page */ +#define _PAGE_SZ512K_4V 0x0000000000000002 /* 512K Page */ +#define _PAGE_SZ64K_4V 0x0000000000000001 /* 64K Page */ +#define _PAGE_SZ8K_4V 0x0000000000000000 /* 8K Page */ + +#if PAGE_SHIFT == 13 +#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V +#elif PAGE_SHIFT == 16 +#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V +#elif PAGE_SHIFT == 19 +#define _PAGE_SZBITS_4U _PAGE_SZ512K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ512K_4V +#elif PAGE_SHIFT == 22 +#define _PAGE_SZBITS_4U _PAGE_SZ4MB_4U +#define _PAGE_SZBITS_4V _PAGE_SZ4MB_4V +#else +#error Wrong PAGE_SHIFT specified +#endif + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) +#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V +#endif + +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + +pgprot_t PAGE_KERNEL __read_mostly; +EXPORT_SYMBOL(PAGE_KERNEL); + +pgprot_t PAGE_KERNEL_LOCKED __read_mostly; +pgprot_t PAGE_COPY __read_mostly; +pgprot_t PAGE_EXEC __read_mostly; +unsigned long pg_iobits __read_mostly; + +unsigned long _PAGE_IE __read_mostly; +unsigned long _PAGE_E __read_mostly; +unsigned long _PAGE_CACHE __read_mostly; + +static void prot_init_common(unsigned long page_none, + unsigned long page_shared, + unsigned long page_copy, + unsigned long page_readonly, + unsigned long page_exec_bit) +{ + PAGE_COPY = __pgprot(page_copy); + + protection_map[0x0] = __pgprot(page_none); + protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x4] = __pgprot(page_readonly); + protection_map[0x5] = __pgprot(page_readonly); + protection_map[0x6] = __pgprot(page_copy); + protection_map[0x7] = __pgprot(page_copy); + protection_map[0x8] = __pgprot(page_none); + protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xc] = __pgprot(page_readonly); + protection_map[0xd] = __pgprot(page_readonly); + protection_map[0xe] = __pgprot(page_shared); + protection_map[0xf] = __pgprot(page_shared); +} + +static void __init sun4u_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U); + PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U | _PAGE_L_4U); + PAGE_EXEC = __pgprot(_PAGE_EXEC_4U); + + _PAGE_IE = _PAGE_IE_4U; + _PAGE_E = _PAGE_E_4U; + _PAGE_CACHE = _PAGE_CACHE_4U; + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U | + __ACCESS_BITS_4U | _PAGE_E_4U); + + kern_linear_pte_xor = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ + 0xfffff80000000000; + kern_linear_pte_xor |= (_PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + + _PAGE_SZBITS = _PAGE_SZBITS_4U; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | + _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | + _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); + + + page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + + page_exec_bit = _PAGE_EXEC_4U; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +static void __init sun4v_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | + _PAGE_CACHE_4V | _PAGE_P_4V | + __ACCESS_BITS_4V | __DIRTY_BITS_4V | + _PAGE_EXEC_4V); + PAGE_KERNEL_LOCKED = PAGE_KERNEL; + PAGE_EXEC = __pgprot(_PAGE_EXEC_4V); + + _PAGE_IE = _PAGE_IE_4V; + _PAGE_E = _PAGE_E_4V; + _PAGE_CACHE = _PAGE_CACHE_4V; + + kern_linear_pte_xor = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ + 0xfffff80000000000; + kern_linear_pte_xor |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | + __ACCESS_BITS_4V | _PAGE_E_4V); + + _PAGE_SZBITS = _PAGE_SZBITS_4V; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | + _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | + _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | + _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); + + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + + page_exec_bit = _PAGE_EXEC_4V; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +unsigned long pte_sz_bits(unsigned long sz) +{ + if (tlb_type == hypervisor) { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4V; + case 64 * 1024: + return _PAGE_SZ64K_4V; + case 512 * 1024: + return _PAGE_SZ512K_4V; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4V; + }; + } else { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4U; + case 64 * 1024: + return _PAGE_SZ64K_4U; + case 512 * 1024: + return _PAGE_SZ512K_4U; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4U; + }; + } +} + +pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size) +{ + pte_t pte; + if (tlb_type == hypervisor) { + pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E_4V) & + ~(unsigned long)_PAGE_CACHE_4V); + } else { + pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E_4U) & + ~(unsigned long)_PAGE_CACHE_4U); + } + pte_val(pte) |= (((unsigned long)space) << 32); + pte_val(pte) |= pte_sz_bits(page_size); + return pte; +} + +unsigned long pte_present(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_PRESENT_4V : _PAGE_PRESENT_4U)); +} + +unsigned long pte_file(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_FILE_4V : _PAGE_FILE_4U)); +} + +unsigned long pte_read(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_READ_4V : _PAGE_READ_4U)); +} + +unsigned long pte_exec(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_EXEC_4V : _PAGE_EXEC_4U)); +} + +unsigned long pte_write(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_WRITE_4V : _PAGE_WRITE_4U)); +} + +unsigned long pte_dirty(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_MODIFIED_4V : _PAGE_MODIFIED_4U)); +} + +unsigned long pte_young(pte_t pte) +{ + return (pte_val(pte) & + ((tlb_type == hypervisor) ? + _PAGE_ACCESSED_4V : _PAGE_ACCESSED_4U)); +} + +pte_t pte_wrprotect(pte_t pte) +{ + unsigned long mask = _PAGE_WRITE_4U | _PAGE_W_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_WRITE_4V | _PAGE_W_4V; + + return __pte(pte_val(pte) & ~mask); +} + +pte_t pte_rdprotect(pte_t pte) +{ + unsigned long mask = _PAGE_R | _PAGE_READ_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_R | _PAGE_READ_4V; + + return __pte(pte_val(pte) & ~mask); +} + +pte_t pte_mkclean(pte_t pte) +{ + unsigned long mask = _PAGE_MODIFIED_4U | _PAGE_W_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_MODIFIED_4V | _PAGE_W_4V; + + return __pte(pte_val(pte) & ~mask); +} + +pte_t pte_mkold(pte_t pte) +{ + unsigned long mask = _PAGE_R | _PAGE_ACCESSED_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_R | _PAGE_ACCESSED_4V; + + return __pte(pte_val(pte) & ~mask); +} + +pte_t pte_mkyoung(pte_t pte) +{ + unsigned long mask = _PAGE_R | _PAGE_ACCESSED_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_R | _PAGE_ACCESSED_4V; + + return __pte(pte_val(pte) | mask); +} + +pte_t pte_mkwrite(pte_t pte) +{ + unsigned long mask = _PAGE_WRITE_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_WRITE_4V; + + return __pte(pte_val(pte) | mask); +} + +pte_t pte_mkdirty(pte_t pte) +{ + unsigned long mask = _PAGE_MODIFIED_4U | _PAGE_W_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_MODIFIED_4V | _PAGE_W_4V; + + return __pte(pte_val(pte) | mask); +} + +pte_t pte_mkhuge(pte_t pte) +{ + unsigned long mask = _PAGE_SZHUGE_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_SZHUGE_4V; + + return __pte(pte_val(pte) | mask); +} + +pte_t pgoff_to_pte(unsigned long off) +{ + unsigned long bit = _PAGE_FILE_4U; + + if (tlb_type == hypervisor) + bit = _PAGE_FILE_4V; + + return __pte((off << PAGE_SHIFT) | bit); +} + +pgprot_t pgprot_noncached(pgprot_t prot) +{ + unsigned long val = pgprot_val(prot); + unsigned long off = _PAGE_CP_4U | _PAGE_CV_4U; + unsigned long on = _PAGE_E_4U; + + if (tlb_type == hypervisor) { + off = _PAGE_CP_4V | _PAGE_CV_4V; + on = _PAGE_E_4V; + } + + return __pgprot((val & ~off) | on); +} + +pte_t pfn_pte(unsigned long pfn, pgprot_t prot) +{ + unsigned long sz_bits = _PAGE_SZBITS_4U; + + if (tlb_type == hypervisor) + sz_bits = _PAGE_SZBITS_4V; + + return __pte((pfn << PAGE_SHIFT) | pgprot_val(prot) | sz_bits); +} + +unsigned long pte_pfn(pte_t pte) +{ + unsigned long mask = _PAGE_PADDR_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + return (pte_val(pte) & mask) >> PAGE_SHIFT; +} + +pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) +{ + unsigned long preserve_mask; + unsigned long val; + + preserve_mask = (_PAGE_PADDR_4U | + _PAGE_MODIFIED_4U | + _PAGE_ACCESSED_4U | + _PAGE_CP_4U | + _PAGE_CV_4U | + _PAGE_E_4U | + _PAGE_PRESENT_4U | + _PAGE_SZBITS_4U); + if (tlb_type == hypervisor) + preserve_mask = (_PAGE_PADDR_4V | + _PAGE_MODIFIED_4V | + _PAGE_ACCESSED_4V | + _PAGE_CP_4V | + _PAGE_CV_4V | + _PAGE_E_4V | + _PAGE_PRESENT_4V | + _PAGE_SZBITS_4V); + + val = (pte_val(orig_pte) & preserve_mask); + + return __pte(val | (pgprot_val(new_prot) & ~preserve_mask)); +} + +static unsigned long kern_large_tte(unsigned long paddr) +{ + unsigned long val; + + val = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | + _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + val = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + _PAGE_EXEC_4V | _PAGE_W_4V); + + return val | paddr; +} + +/* + * Translate PROM's mapping we capture at boot time into physical address. + * The second parameter is only set from prom_callback() invocations. + */ +unsigned long prom_virt_to_phys(unsigned long promva, int *error) +{ + unsigned long mask; + int i; + + mask = _PAGE_PADDR_4U; + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; + + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & mask; + + if (error) + *error = 0; + return base + (promva & (8192 - 1)); + } + } + if (error) + *error = 1; + return 0UL; +} + +/* XXX We should kill off this ugly thing at so me point. XXX */ +unsigned long sun4u_get_pte(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long mask = _PAGE_PADDR_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + if (addr >= PAGE_OFFSET) + return addr & mask; + + if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) + return prom_virt_to_phys(addr, NULL); + + pgdp = pgd_offset_k(addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + ptep = pte_offset_kernel(pmdp, addr); + + return pte_val(*ptep) & mask; +} + +/* If not locked, zap it. */ +void __flush_tlb_all(void) +{ + unsigned long pstate; + int i; + + __asm__ __volatile__("flushw\n\t" + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + if (tlb_type == spitfire) { + for (i = 0; i < 64; i++) { + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); + spitfire_put_dtlb_data(i, 0x0UL); + } + + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); + spitfire_put_itlb_data(i, 0x0UL); + } + } + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { + cheetah_flush_dtlb_all(); + cheetah_flush_itlb_all(); + } + __asm__ __volatile__("wrpr %0, 0, %%pstate" + : : "r" (pstate)); +} diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index c5dc4b0cc1c..975242ab88e 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -85,8 +85,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb); base = TSBMAP_BASE; - tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP | - _PAGE_CV | _PAGE_P | _PAGE_W); + tte = pgprot_val(PAGE_KERNEL_LOCKED); tsb_paddr = __pa(mm->context.tsb); BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); @@ -99,55 +98,48 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) #ifdef DCACHE_ALIASING_POSSIBLE base += (tsb_paddr & 8192); #endif - tte |= _PAGE_SZ8K; page_sz = 8192; break; case 8192 << 1: tsb_reg = 0x1UL; - tte |= _PAGE_SZ64K; page_sz = 64 * 1024; break; case 8192 << 2: tsb_reg = 0x2UL; - tte |= _PAGE_SZ64K; page_sz = 64 * 1024; break; case 8192 << 3: tsb_reg = 0x3UL; - tte |= _PAGE_SZ64K; page_sz = 64 * 1024; break; case 8192 << 4: tsb_reg = 0x4UL; - tte |= _PAGE_SZ512K; page_sz = 512 * 1024; break; case 8192 << 5: tsb_reg = 0x5UL; - tte |= _PAGE_SZ512K; page_sz = 512 * 1024; break; case 8192 << 6: tsb_reg = 0x6UL; - tte |= _PAGE_SZ512K; page_sz = 512 * 1024; break; case 8192 << 7: tsb_reg = 0x7UL; - tte |= _PAGE_SZ4MB; page_sz = 4 * 1024 * 1024; break; default: BUG(); }; + tte |= pte_sz_bits(page_sz); if (tlb_type == cheetah_plus || tlb_type == hypervisor) { /* Physical mapping, no locked TLB entry for TSB. */ diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index a480007f0a9..bd8bce704a9 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -90,134 +90,48 @@ #endif /* !(__ASSEMBLY__) */ -/* Spitfire/Cheetah TTE bits. */ -#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ -#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit up to date*/ -#define _PAGE_SZ4MB _AC(0x6000000000000000,UL) /* 4MB Page */ -#define _PAGE_SZ512K _AC(0x4000000000000000,UL) /* 512K Page */ -#define _PAGE_SZ64K _AC(0x2000000000000000,UL) /* 64K Page */ -#define _PAGE_SZ8K _AC(0x0000000000000000,UL) /* 8K Page */ -#define _PAGE_NFO _AC(0x1000000000000000,UL) /* No Fault Only */ -#define _PAGE_IE _AC(0x0800000000000000,UL) /* Invert Endianness */ -#define _PAGE_SOFT2 _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ -#define _PAGE_RES1 _AC(0x0002000000000000,UL) /* Reserved */ -#define _PAGE_SZ32MB _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ -#define _PAGE_SZ256MB _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ -#define _PAGE_SN _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ -#define _PAGE_RES2 _AC(0x0000780000000000,UL) /* Reserved */ -#define _PAGE_PADDR_SF _AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/ -#define _PAGE_PADDR _AC(0x000007FFFFFFE000,UL) /* (Cheetah) paddr[42:13] */ -#define _PAGE_SOFT _AC(0x0000000000001F80,UL) /* Software bits */ -#define _PAGE_L _AC(0x0000000000000040,UL) /* Locked TTE */ -#define _PAGE_CP _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */ -#define _PAGE_CV _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */ -#define _PAGE_E _AC(0x0000000000000008,UL) /* side-Effect */ -#define _PAGE_P _AC(0x0000000000000004,UL) /* Privileged Page */ -#define _PAGE_W _AC(0x0000000000000002,UL) /* Writable */ -#define _PAGE_G _AC(0x0000000000000001,UL) /* Global */ - -#define _PAGE_ALL_SZ_BITS \ - (_PAGE_SZ4MB | _PAGE_SZ512K | _PAGE_SZ64K | \ - _PAGE_SZ8K | _PAGE_SZ32MB | _PAGE_SZ256MB) - -/* Here are the SpitFire software bits we use in the TTE's. - * - * WARNING: If you are going to try and start using some - * of the soft2 bits, you will need to make - * modifications to the swap entry implementation. - * For example, one thing that could happen is that - * swp_entry_to_pte() would BUG_ON() if you tried - * to use one of the soft2 bits for _PAGE_FILE. - * - * Like other architectures, I have aliased _PAGE_FILE with - * _PAGE_MODIFIED. This works because _PAGE_FILE is never - * interpreted that way unless _PAGE_PRESENT is clear. - */ -#define _PAGE_EXEC _AC(0x0000000000001000,UL) /* Executable SW bit */ -#define _PAGE_MODIFIED _AC(0x0000000000000800,UL) /* Modified (dirty) */ -#define _PAGE_FILE _AC(0x0000000000000800,UL) /* Pagecache page */ -#define _PAGE_ACCESSED _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ -#define _PAGE_READ _AC(0x0000000000000200,UL) /* Readable SW Bit */ -#define _PAGE_WRITE _AC(0x0000000000000100,UL) /* Writable SW Bit */ -#define _PAGE_PRESENT _AC(0x0000000000000080,UL) /* Present */ - -#if PAGE_SHIFT == 13 -#define _PAGE_SZBITS _PAGE_SZ8K -#elif PAGE_SHIFT == 16 -#define _PAGE_SZBITS _PAGE_SZ64K -#elif PAGE_SHIFT == 19 -#define _PAGE_SZBITS _PAGE_SZ512K -#elif PAGE_SHIFT == 22 -#define _PAGE_SZBITS _PAGE_SZ4MB -#else -#error Wrong PAGE_SHIFT specified -#endif - -#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) -#define _PAGE_SZHUGE _PAGE_SZ4MB -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -#define _PAGE_SZHUGE _PAGE_SZ512K -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -#define _PAGE_SZHUGE _PAGE_SZ64K -#endif - -#define _PAGE_CACHE (_PAGE_CP | _PAGE_CV) - -#define __DIRTY_BITS (_PAGE_MODIFIED | _PAGE_WRITE | _PAGE_W) -#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_READ | _PAGE_R) -#define __PRIV_BITS _PAGE_P - -#define PAGE_NONE __pgprot (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_CACHE) - -/* Don't set the TTE _PAGE_W bit here, else the dirty bit never gets set. */ -#define PAGE_SHARED __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE | _PAGE_EXEC) - -#define PAGE_COPY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) - -#define PAGE_READONLY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) - -#define PAGE_KERNEL __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __PRIV_BITS | \ - __ACCESS_BITS | __DIRTY_BITS | _PAGE_EXEC) - -#define PAGE_SHARED_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE) - -#define PAGE_COPY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) - -#define PAGE_READONLY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) - -#define _PFN_MASK _PAGE_PADDR - -#define pg_iobits (_PAGE_VALID | _PAGE_PRESENT | __DIRTY_BITS | \ - __ACCESS_BITS | _PAGE_E) - -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY_NOEXEC -#define __P010 PAGE_COPY_NOEXEC -#define __P011 PAGE_COPY_NOEXEC -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY_NOEXEC -#define __S010 PAGE_SHARED_NOEXEC -#define __S011 PAGE_SHARED_NOEXEC -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED +/* PTE bits which are the same in SUN4U and SUN4V format. */ +#define _PAGE_VALID 0x8000000000000000 /* Valid TTE */ +#define _PAGE_R 0x8000000000000000 /* Keep ref bit up to date*/ + +/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ +#define __P000 __pgprot(0) +#define __P001 __pgprot(0) +#define __P010 __pgprot(0) +#define __P011 __pgprot(0) +#define __P100 __pgprot(0) +#define __P101 __pgprot(0) +#define __P110 __pgprot(0) +#define __P111 __pgprot(0) + +#define __S000 __pgprot(0) +#define __S001 __pgprot(0) +#define __S010 __pgprot(0) +#define __S011 __pgprot(0) +#define __S100 __pgprot(0) +#define __S101 __pgprot(0) +#define __S110 __pgprot(0) +#define __S111 __pgprot(0) #ifndef __ASSEMBLY__ +extern pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long); + +extern unsigned long pte_sz_bits(unsigned long size); + +extern pgprot_t PAGE_KERNEL; +extern pgprot_t PAGE_KERNEL_LOCKED; +extern pgprot_t PAGE_COPY; + +/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */ +extern unsigned long _PAGE_IE; +extern unsigned long _PAGE_E; +extern unsigned long _PAGE_CACHE; + +extern unsigned long pg_iobits; +extern unsigned long _PAGE_ALL_SZ_BITS; +extern unsigned long _PAGE_SZBITS; + extern unsigned long phys_base; extern unsigned long pfn_base; @@ -229,27 +143,12 @@ extern struct page *mem_map_zero; * the first physical page in the machine is at some huge physical address, * such as 4GB. This is common on a partitioned E10000, for example. */ - -#define pfn_pte(pfn, prot) \ - __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot) | _PAGE_SZBITS) +extern pte_t pfn_pte(unsigned long, pgprot_t); #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +extern unsigned long pte_pfn(pte_t); +#define pte_page(x) pfn_to_page(pte_pfn(x)) +extern pte_t pte_modify(pte_t, pgprot_t); -#define pte_pfn(x) ((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT) -#define pte_page(x) pfn_to_page(pte_pfn(x)) - -static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) -{ - pte_t __pte; - const unsigned long preserve_mask = (_PFN_MASK | - _PAGE_MODIFIED | _PAGE_ACCESSED | - _PAGE_CACHE | _PAGE_E | - _PAGE_PRESENT | _PAGE_SZBITS); - - pte_val(__pte) = (pte_val(orig_pte) & preserve_mask) | - (pgprot_val(new_prot) & ~preserve_mask); - - return __pte; -} #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ @@ -259,8 +158,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page(pud) \ ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (0) #define pmd_present(pmd) (pmd_val(pmd) != 0U) @@ -270,30 +167,29 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0U) +/* Same in both SUN4V and SUN4U. */ +#define pte_none(pte) (!pte_val(pte)) + +extern unsigned long pte_present(pte_t); + /* The following only work if pte_present() is true. * Undefined behaviour if not.. */ -#define pte_read(pte) (pte_val(pte) & _PAGE_READ) -#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC) -#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) -#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~(_PAGE_WRITE|_PAGE_W))) -#define pte_rdprotect(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_READ)) -#define pte_mkclean(pte) \ - (__pte(pte_val(pte) & ~(_PAGE_MODIFIED|_PAGE_W))) -#define pte_mkold(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_ACCESSED)) - -/* Permanent address of a page. */ -#define __page_address(page) page_address(page) +extern unsigned long pte_read(pte_t); +extern unsigned long pte_exec(pte_t); +extern unsigned long pte_write(pte_t); +extern unsigned long pte_dirty(pte_t); +extern unsigned long pte_young(pte_t); +extern pte_t pte_wrprotect(pte_t); +extern pte_t pte_rdprotect(pte_t); +extern pte_t pte_mkclean(pte_t); +extern pte_t pte_mkold(pte_t); /* Be very careful when you change these three, they are delicate. */ -#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) -#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) -#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) -#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE)) +extern pte_t pte_mkyoung(pte_t); +extern pte_t pte_mkwrite(pte_t); +extern pte_t pte_mkdirty(pte_t); +extern pte_t pte_mkhuge(pte_t); /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -328,6 +224,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p /* It is more efficient to let flush_tlb_kernel_range() * handle init_mm tlb flushes. + * + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. */ if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID)) tlb_batch_add(mm, addr, ptep, orig); @@ -362,42 +261,23 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) /* File offset in PTE support. */ -#define pte_file(pte) (pte_val(pte) & _PAGE_FILE) +extern unsigned long pte_file(pte_t); #define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pgoff_to_pte(off) (__pte(((off) << PAGE_SHIFT) | _PAGE_FILE)) +extern pte_t pgoff_to_pte(unsigned long); #define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) extern unsigned long prom_virt_to_phys(unsigned long, int *); -static __inline__ unsigned long -sun4u_get_pte (unsigned long addr) -{ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - if (addr >= PAGE_OFFSET) - return addr & _PAGE_PADDR; - if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) - return prom_virt_to_phys(addr, NULL); - pgdp = pgd_offset_k(addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - return pte_val(*ptep) & _PAGE_PADDR; -} +extern unsigned long sun4u_get_pte(unsigned long); -static __inline__ unsigned long -__get_phys (unsigned long addr) +static inline unsigned long __get_phys(unsigned long addr) { - return sun4u_get_pte (addr); + return sun4u_get_pte(addr); } -static __inline__ int -__get_iospace (unsigned long addr) +static inline int __get_iospace(unsigned long addr) { - return ((sun4u_get_pte (addr) & 0xf0000000) >> 28); + return ((sun4u_get_pte(addr) & 0xf0000000) >> 28); } extern unsigned long *sparc64_valid_addr_bitmap; @@ -411,9 +291,7 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long size, pgprot_t prot); /* Clear virtual and physical cachability, set side-effect bit. */ -#define pgprot_noncached(prot) \ - (__pgprot((pgprot_val(prot) & ~(_PAGE_CP | _PAGE_CV)) | \ - _PAGE_E)) +extern pgprot_t pgprot_noncached(pgprot_t); /* * For sparc32&64, the pfn in io_remap_pfn_range() carries in -- cgit v1.2.3-70-g09d2 From 8b234274418d6d79527c4ac3a72da446ca4cb35f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Feb 2006 18:01:02 -0800 Subject: [SPARC64]: More TLB/TSB handling fixes. The SUN4V convention with non-shared TSBs is that the context bit of the TAG is clear. So we have to choose an "invalid" bit and initialize new TSBs appropriately. Otherwise a zero TAG looks "valid". Make sure, for the window fixup cases, that we use the right global registers and that we don't potentially trample on the live global registers in etrap/rtrap handling (%g2 and %g6) and that we put the missing virtual address properly in %g5. Signed-off-by: David S. Miller --- arch/sparc64/kernel/dtlb_miss.S | 4 ++-- arch/sparc64/kernel/itlb_miss.S | 12 +++++------ arch/sparc64/kernel/ktlb.S | 14 ++++++++----- arch/sparc64/kernel/sun4v_tlb_miss.S | 39 +++++++++++++++++++----------------- arch/sparc64/kernel/tsb.S | 17 ++++++++++------ arch/sparc64/mm/init.c | 4 +++- arch/sparc64/mm/tsb.c | 25 +++++++++++------------ include/asm-sparc64/tsb.h | 8 ++++++++ include/asm-sparc64/ttable.h | 12 +++++------ 9 files changed, 78 insertions(+), 57 deletions(-) (limited to 'arch/sparc64/kernel/itlb_miss.S') diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S index 2ef6f6e6e72..09a6a15a710 100644 --- a/arch/sparc64/kernel/dtlb_miss.S +++ b/arch/sparc64/kernel/dtlb_miss.S @@ -2,10 +2,10 @@ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context brz,pn %g5, kvmap_dtlb ! Context 0 processing - nop ! Delay slot (fill me) + srlx %g6, 22, %g6 ! Delay slot TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry - nop ! Push branch to next I$ line cmp %g4, %g6 ! Compare TAG /* DTLB ** ICACHE line 2: TSB compare and TLB load */ diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S index 730caa4a150..6dfe3968c37 100644 --- a/arch/sparc64/kernel/itlb_miss.S +++ b/arch/sparc64/kernel/itlb_miss.S @@ -2,25 +2,25 @@ ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context brz,pn %g5, kvmap_itlb ! Context 0 processing - nop ! Delay slot (fill me) + srlx %g6, 22, %g6 ! Delay slot TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry cmp %g4, %g6 ! Compare TAG - sethi %hi(PAGE_EXEC), %g4 ! Setup exec check /* ITLB ** ICACHE line 2: TSB compare and TLB load */ + sethi %hi(PAGE_EXEC), %g4 ! Setup exec check ldx [%g4 + %lo(PAGE_EXEC)], %g4 bne,pn %xcc, tsb_miss_itlb ! Miss mov FAULT_CODE_ITLB, %g3 andcc %g5, %g4, %g0 ! Executable? be,pn %xcc, tsb_do_fault nop ! Delay slot, fill me - stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB - retry ! Trap done + nop /* ITLB ** ICACHE line 3: */ - nop - nop + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done nop nop nop diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 47dfd45971e..ac29da915d0 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -52,8 +52,10 @@ kvmap_itlb_vmalloc_addr: /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 brgez,a,pn %g5, kvmap_itlb_longpath - KTSB_STORE(%g1, %g0) + KTSB_STORE(%g1, %g7) KTSB_WRITE(%g1, %g5, %g6) @@ -146,8 +148,10 @@ kvmap_dtlb_vmalloc_addr: /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 brgez,a,pn %g5, kvmap_dtlb_longpath - KTSB_STORE(%g1, %g0) + KTSB_STORE(%g1, %g7) KTSB_WRITE(%g1, %g5, %g6) @@ -215,8 +219,8 @@ kvmap_dtlb_longpath: wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate .section .sun4v_2insn_patch, "ax" .word 661b - nop - nop + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g5 .previous rdpr %tl, %g3 @@ -226,7 +230,7 @@ kvmap_dtlb_longpath: ldxa [%g4] ASI_DMMU, %g5 .section .sun4v_2insn_patch, "ax" .word 661b - mov %g4, %g5 + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 nop .previous diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S index 244d50de849..57ccdaec7cc 100644 --- a/arch/sparc64/kernel/sun4v_tlb_miss.S +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -16,15 +16,14 @@ ldx [BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \ ldx [BASE + HV_FAULT_D_CTX_OFFSET], CTX; - /* DEST = (CTX << 48) | (VADDR >> 22) + /* DEST = (VADDR >> 22) * * Branch to ZERO_CTX_LABEL is context is zero. */ -#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, TMP, ZERO_CTX_LABEL) \ - srlx VADDR, 22, TMP; \ - sllx CTX, 48, DEST; \ +#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \ + srlx VADDR, 22, DEST; \ brz,pn CTX, ZERO_CTX_LABEL; \ - or DEST, TMP, DEST; + nop; /* Create TSB pointer. This is something like: * @@ -53,7 +52,7 @@ sun4v_itlb_miss: ldxa [%g1] ASI_SCRATCHPAD, %g1 LOAD_ITLB_INFO(%g2, %g4, %g5) - COMPUTE_TAG_TARGET(%g6, %g4, %g5, %g3, kvmap_itlb_4v) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ @@ -72,15 +71,15 @@ sun4v_itlb_miss: * * %g3: PTE * %g4: vaddr - * %g6: TAG TARGET (only "CTX << 48" part matters) */ sun4v_itlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 mov %o0, %g1 ! save %o0 mov %o1, %g2 ! save %o1 mov %o2, %g5 ! save %o2 mov %o3, %g7 ! save %o3 mov %g4, %o0 ! vaddr - srlx %g6, 48, %o1 ! ctx + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 ! ctx mov %g3, %o2 ! PTE mov HV_MMU_IMMU, %o3 ! flags ta HV_MMU_MAP_ADDR_TRAP @@ -101,7 +100,7 @@ sun4v_dtlb_miss: ldxa [%g1] ASI_SCRATCHPAD, %g1 LOAD_DTLB_INFO(%g2, %g4, %g5) - COMPUTE_TAG_TARGET(%g6, %g4, %g5, %g3, kvmap_dtlb_4v) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ @@ -115,15 +114,15 @@ sun4v_dtlb_miss: * * %g3: PTE * %g4: vaddr - * %g6: TAG TARGET (only "CTX << 48" part matters) */ sun4v_dtlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 mov %o0, %g1 ! save %o0 mov %o1, %g2 ! save %o1 mov %o2, %g5 ! save %o2 mov %o3, %g7 ! save %o3 mov %g4, %o0 ! vaddr - srlx %g6, 48, %o1 ! ctx + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 ! ctx mov %g3, %o2 ! PTE mov HV_MMU_DMMU, %o3 ! flags ta HV_MMU_MAP_ADDR_TRAP @@ -136,16 +135,18 @@ sun4v_dtlb_load: retry sun4v_dtlb_prot: + SET_GL(1) + /* Load MMU Miss base into %g2. */ - ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldxa [%g0] ASI_SCRATCHPAD, %g5 - ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 rdpr %tl, %g1 cmp %g1, 1 - bgu,pn %xcc, winfix_trampoline + bgu,pn %xcc, winfix_trampoline nop - ba,pt %xcc, sparc64_realfault_common - mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 /* Called from trap table with TAG TARGET placed into * %g6, SCRATCHPAD_UTSBREG1 contents in %g1, and @@ -189,7 +190,8 @@ sun4v_itlb_error: sethi %hi(sun4v_err_itlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] sethi %hi(sun4v_err_itlb_ctx), %g1 - srlx %g6, 48, %o1 ! ctx + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 stx %o1, [%g1 + %lo(sun4v_err_itlb_ctx)] sethi %hi(sun4v_err_itlb_pte), %g1 stx %g3, [%g1 + %lo(sun4v_err_itlb_pte)] @@ -214,7 +216,8 @@ sun4v_dtlb_error: sethi %hi(sun4v_err_dtlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] sethi %hi(sun4v_err_dtlb_ctx), %g1 - srlx %g6, 48, %o1 ! ctx + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 stx %o1, [%g1 + %lo(sun4v_err_dtlb_ctx)] sethi %hi(sun4v_err_dtlb_pte), %g1 stx %g3, [%g1 + %lo(sun4v_err_dtlb_pte)] diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index a17259cf34b..cc225c0563c 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -36,7 +36,7 @@ tsb_miss_itlb: /* At this point we have: * %g4 -- missing virtual address * %g1 -- TSB entry address - * %g6 -- TAG TARGET ((vaddr >> 22) | (ctx << 48)) + * %g6 -- TAG TARGET (vaddr >> 22) */ tsb_miss_page_table_walk: TRAP_LOAD_PGD_PHYS(%g7, %g5) @@ -50,8 +50,10 @@ tsb_reload: /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 brgez,a,pn %g5, tsb_do_fault - TSB_STORE(%g1, %g0) + TSB_STORE(%g1, %g7) /* If it is larger than the base page size, don't * bother putting it into the TSB. @@ -62,8 +64,10 @@ tsb_reload: sethi %hi(_PAGE_SZBITS), %g7 ldx [%g7 + %lo(_PAGE_SZBITS)], %g7 cmp %g2, %g7 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 bne,a,pn %xcc, tsb_tlb_reload - TSB_STORE(%g1, %g0) + TSB_STORE(%g1, %g7) TSB_WRITE(%g1, %g5, %g6) @@ -136,7 +140,7 @@ tsb_do_fault: .section .sun4v_2insn_patch, "ax" .word 661b SET_GL(1) - ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldxa [%g0] ASI_SCRATCHPAD, %g4 .previous bne,pn %xcc, tsb_do_itlb_fault @@ -150,7 +154,7 @@ tsb_do_dtlb_fault: ldxa [%g4] ASI_DMMU, %g5 .section .sun4v_2insn_patch, "ax" .word 661b - ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 + ldx [%g4 + HV_FAULT_D_ADDR_OFFSET], %g5 nop .previous @@ -217,8 +221,9 @@ tsb_flush: bne,pn %icc, 1b membar #LoadLoad cmp %g1, %o1 + mov 1, %o3 bne,pt %xcc, 2f - clr %o3 + sllx %o3, TSB_TAG_INVALID_BIT, %o3 TSB_CAS_TAG(%o0, %g1, %o3) cmp %g1, %o3 bne,pn %xcc, 1b diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index bd9e3205674..aa2aec6373c 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -296,7 +296,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & (mm->context.tsb_nentries - 1UL)]; - tag = (address >> 22UL) | CTX_HWBITS(mm->context) << 48UL; + tag = (address >> 22UL); tsb_insert(tsb, tag, pte_val(pte)); } } @@ -1110,6 +1110,8 @@ void __init paging_init(void) kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; + memset(swapper_tsb, 0x40, sizeof(swapper_tsb)); + if (tlb_type == hypervisor) sun4v_pgprot_init(); else diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index 3c1ff05038b..353cb060561 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -20,9 +20,9 @@ static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries return vaddr & (nentries - 1); } -static inline int tag_compare(unsigned long tag, unsigned long vaddr, unsigned long context) +static inline int tag_compare(unsigned long tag, unsigned long vaddr) { - return (tag == ((vaddr >> 22) | (context << 48))); + return (tag == (vaddr >> 22)); } /* TSB flushes need only occur on the processor initiating the address @@ -38,8 +38,8 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); struct tsb *ent = &swapper_tsb[hash]; - if (tag_compare(ent->tag, v, 0)) { - ent->tag = 0UL; + if (tag_compare(ent->tag, v)) { + ent->tag = (1UL << TSB_TAG_INVALID_BIT); membar_storeload_storestore(); } } @@ -50,14 +50,9 @@ void flush_tsb_user(struct mmu_gather *mp) struct mm_struct *mm = mp->mm; struct tsb *tsb = mm->context.tsb; unsigned long nentries = mm->context.tsb_nentries; - unsigned long ctx, base; + unsigned long base; int i; - if (unlikely(!CTX_VALID(mm->context))) - return; - - ctx = CTX_HWBITS(mm->context); - if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(tsb); else @@ -71,7 +66,7 @@ void flush_tsb_user(struct mmu_gather *mp) hash = tsb_hash(v, nentries); ent = base + (hash * sizeof(struct tsb)); - tag = (v >> 22UL) | (ctx << 48UL); + tag = (v >> 22UL); tsb_flush(ent, tag); } @@ -243,7 +238,8 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, "i" (ASI_NUCLEUS_QUAD_LDD)); } - if (!tag || (tag & (1UL << TSB_TAG_LOCK_BIT))) + if (tag & ((1UL << TSB_TAG_LOCK_BIT) | + (1UL << TSB_TAG_INVALID_BIT))) continue; /* We only put base page size PTEs into the TSB, @@ -315,10 +311,13 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) break; } - page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(size)); + page = alloc_pages(gfp_flags, get_order(size)); if (unlikely(!page)) return; + /* Mark all tags as invalid. */ + memset(page_address(page), 0x40, size); + if (size == max_tsb_size) mm->context.tsb_rss_limit = ~0UL; else diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h index 7f3abc32c4d..6e6768067e3 100644 --- a/include/asm-sparc64/tsb.h +++ b/include/asm-sparc64/tsb.h @@ -12,6 +12,8 @@ * * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1 * ldxa [%g0] ASI_{D,I}MMU, %g6 + * sllx %g6, 22, %g6 + * srlx %g6, 22, %g6 * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 * cmp %g4, %g6 * bne,pn %xcc, tsb_miss_{d,i}tlb @@ -29,6 +31,9 @@ * ------------------------------------------------- * 63 61 60 48 47 42 41 0 * + * But actually, since we use per-mm TSB's, we zero out the CONTEXT + * field. + * * Like the powerpc hashtables we need to use locking in order to * synchronize while we update the entries. PTE updates need locking * as well. @@ -42,6 +47,9 @@ #define TSB_TAG_LOCK_BIT 47 #define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32)) +#define TSB_TAG_INVALID_BIT 46 +#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32)) + #define TSB_MEMBAR membar #StoreStore /* Some cpus support physical address quad loads. We want to use diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 9e28b240f3a..2d5e3c464df 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -184,20 +184,20 @@ ldxa [%g0] ASI_SCRATCHPAD, %g2; \ ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \ ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \ - srlx %g4, 22, %g7; \ - sllx %g5, 48, %g6; \ + srlx %g4, 22, %g6; \ ba,pt %xcc, sun4v_itsb_miss; \ - or %g6, %g7, %g6; \ + nop; \ + nop; \ nop; #define SUN4V_DTSB_MISS \ ldxa [%g0] ASI_SCRATCHPAD, %g2; \ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \ - srlx %g4, 22, %g7; \ - sllx %g5, 48, %g6; \ + srlx %g4, 22, %g6; \ ba,pt %xcc, sun4v_dtsb_miss; \ - or %g6, %g7, %g6; \ + nop; \ + nop; \ nop; /* Before touching these macros, you owe it to yourself to go and -- cgit v1.2.3-70-g09d2 From 45f791eb0f03e760183d30d3f1f18dc2b8e902fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Mar 2006 22:42:18 -0800 Subject: [SPARC64]: Fix _PAGE_EXEC handling. First of all, use the known _PAGE_EXEC_{4U,4V} value instead of loading _PAGE_EXEC from memory. We either know which one to use by context, or we can code patch the test. Next, we need to check executability of a PTE in the generic TSB miss handler. Signed-off-by: David S. Miller --- arch/sparc64/kernel/itlb_miss.S | 10 +++++----- arch/sparc64/kernel/sun4v_tlb_miss.S | 4 +--- arch/sparc64/kernel/tsb.S | 9 +++++++++ 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/sparc64/kernel/itlb_miss.S') diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S index 6dfe3968c37..ad46e2024f4 100644 --- a/arch/sparc64/kernel/itlb_miss.S +++ b/arch/sparc64/kernel/itlb_miss.S @@ -9,18 +9,18 @@ cmp %g4, %g6 ! Compare TAG /* ITLB ** ICACHE line 2: TSB compare and TLB load */ - sethi %hi(PAGE_EXEC), %g4 ! Setup exec check - ldx [%g4 + %lo(PAGE_EXEC)], %g4 bne,pn %xcc, tsb_miss_itlb ! Miss mov FAULT_CODE_ITLB, %g3 - andcc %g5, %g4, %g0 ! Executable? + andcc %g5, _PAGE_EXEC_4U, %g0 ! Executable? be,pn %xcc, tsb_do_fault nop ! Delay slot, fill me + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done nop /* ITLB ** ICACHE line 3: */ - stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB - retry ! Trap done + nop + nop nop nop nop diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S index 3dccbd67818..3eed8db9684 100644 --- a/arch/sparc64/kernel/sun4v_tlb_miss.S +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -58,11 +58,9 @@ sun4v_itlb_miss: /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 cmp %g2, %g6 - sethi %hi(PAGE_EXEC), %g7 - ldx [%g7 + %lo(PAGE_EXEC)], %g7 bne,a,pn %xcc, tsb_miss_page_table_walk mov FAULT_CODE_ITLB, %g3 - andcc %g3, %g7, %g0 + andcc %g3, _PAGE_EXEC_4V, %g0 be,a,pn %xcc, tsb_do_fault mov FAULT_CODE_ITLB, %g3 diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index cc225c0563c..563852bf359 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -103,6 +103,15 @@ tsb_dtlb_load: mov %g5, %g3 tsb_itlb_load: + /* Executable bit must be set. */ +661: andcc %g5, _PAGE_EXEC_4U, %g0 + .section .sun4v_1insn_patch, "ax" + .word 661b + andcc %g5, _PAGE_EXEC_4V, %g0 + .previous + + be,pn %xcc, tsb_do_fault + nop 661: stxa %g5, [%g0] ASI_ITLB_DATA_IN retry -- cgit v1.2.3-70-g09d2