[SPARC64]: Move away from virtual page tables, part 1.

We now use the TSB hardware assist features of the UltraSPARC MMUs. SMP is currently knowingly broken, we need to find another place to store the per-cpu base pointers. We hid them away in the TSB base register, and that obviously will not work any more :-) Another known broken case is non-8KB base page size. Also noticed that flush_tlb_all() is not referenced anywhere, only the internal __flush_tlb_all() (local cpu only) is used by the sparc64 port, so we can get rid of flush_tlb_all(). The kernel gets it's own 8KB TSB (swapper_tsb) and each address space gets it's own private 8K TSB. Later we can add code to dynamically increase the size of per-process TSB as the RSS grows. An 8KB TSB is good enough for up to about a 4MB RSS, after which the TSB starts to incur many capacity and conflict misses. We even accumulate OBP translations into the kernel TSB. Another area for refinement is large page size support. We could use a secondary address space TSB to handle those. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2006-01-31 18:29:18 -0800
committer: David S. Miller <davem@sunset.davemloft.net> 2006-03-20 01:11:13 -0800
commit: 74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch)
tree: c23dea461e32485f4cd7ca4b8c33c632655eb906 /arch/sparc64/mm
parent: 30d4d1ffed7098afe2641536d67eef150499da02 (diff)
5 files changed, 92 insertions, 204 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f4..e415bf942bc 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
-obj-y    := ultra.o tlb.o fault.o init.o generic.o
+obj-y    := ultra.o tlb.o tsb.o fault.o init.o generic.o
 
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee..da068f6b259 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -408,8 +408,7 @@ unsigned long prom_virt_to_phys(unsigned long promva, int *error)
 
 /* The obp translations are saved based on 8k pagesize, since obp can
  * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
- * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte
- * scheme (also, see rant in inherit_locked_prom_mappings()).
+ * HI_OBP_ADDRESS range are handled in ktlb.S.
  */
 static inline int in_obp_range(unsigned long vaddr)
 {
@@ -539,75 +538,6 @@ static void __init inherit_prom_mappings(void)
 	prom_printf("done.\n");
 }
 
-/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
- * upwards as reserved for use by the firmware (I wonder if this
- * will be the same on Cheetah...).  We use this virtual address
- * range for the VPTE table mappings of the nucleus so we need
- * to zap them when we enter the PROM.  -DaveM
- */
-static void __flush_nucleus_vptes(void)
-{
-	unsigned long prom_reserved_base = 0xfffffffc00000000UL;
-	int i;
-
-	/* Only DTLB must be checked for VPTE entries. */
-	if (tlb_type == spitfire) {
-		for (i = 0; i < 63; i++) {
-			unsigned long tag;
-
-			/* Spitfire Errata #32 workaround */
-			/* NOTE: Always runs on spitfire, so no cheetah+
-			 *       page size encodings.
-			 */
-			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
-					     "flush	%%g6"
-					     : /* No outputs */
-					     : "r" (0),
-					     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
-			tag = spitfire_get_dtlb_tag(i);
-			if (((tag & ~(PAGE_MASK)) == 0) &&
-			    ((tag &  (PAGE_MASK)) >= prom_reserved_base)) {
-				__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-						     "membar #Sync"
-						     : /* no outputs */
-						     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-				spitfire_put_dtlb_data(i, 0x0UL);
-			}
-		}
-	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-		for (i = 0; i < 512; i++) {
-			unsigned long tag = cheetah_get_dtlb_tag(i, 2);
-
-			if ((tag & ~PAGE_MASK) == 0 &&
-			    (tag & PAGE_MASK) >= prom_reserved_base) {
-				__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-						     "membar #Sync"
-						     : /* no outputs */
-						     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-				cheetah_put_dtlb_data(i, 0x0UL, 2);
-			}
-
-			if (tlb_type != cheetah_plus)
-				continue;
-
-			tag = cheetah_get_dtlb_tag(i, 3);
-
-			if ((tag & ~PAGE_MASK) == 0 &&
-			    (tag & PAGE_MASK) >= prom_reserved_base) {
-				__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
-						     "membar #Sync"
-						     : /* no outputs */
-						     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
-				cheetah_put_dtlb_data(i, 0x0UL, 3);
-			}
-		}
-	} else {
-		/* Implement me :-) */
-		BUG();
-	}
-}
-
 static int prom_ditlb_set;
 struct prom_tlb_entry {
 	int		tlb_ent;
@@ -635,9 +565,6 @@ void prom_world(int enter)
 			     : "i" (PSTATE_IE));
 
 	if (enter) {
-		/* Kick out nucleus VPTEs. */
-		__flush_nucleus_vptes();
-
 		/* Install PROM world. */
 		for (i = 0; i < 16; i++) {
 			if (prom_dtlb[i].tlb_ent != -1) {
@@ -1039,18 +966,7 @@ out:
 struct pgtable_cache_struct pgt_quicklists;
 #endif
 
-/* OK, we have to color these pages. The page tables are accessed
- * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
- * code, as well as by PAGE_OFFSET range direct-mapped addresses by 
- * other parts of the kernel. By coloring, we make sure that the tlbmiss 
- * fast handlers do not get data from old/garbage dcache lines that 
- * correspond to an old/stale virtual address (user/kernel) that 
- * previously mapped the pagetable page while accessing vpte range 
- * addresses. The idea is that if the vpte color and PAGE_OFFSET range 
- * color is the same, then when the kernel initializes the pagetable 
- * using the later address range, accesses with the first address
- * range will see the newly initialized data rather than the garbage.
- */
+/* XXX We don't need to color these things in the D-cache any longer.  */
 #ifdef DCACHE_ALIASING_POSSIBLE
 #define DC_ALIAS_SHIFT	1
 #else
@@ -1419,6 +1335,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	kernel_map_range(phys_start, phys_end,
 			 (enable ? PAGE_KERNEL : __pgprot(0)));
 
+	flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+			       PAGE_OFFSET + phys_end);
+
 	/* we should perform an IPI and flush all tlbs,
 	 * but that can deadlock->flush only current cpu.
 	 */
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662..78357cc2a0b 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
 	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
 
 	if (mp->tlb_nr) {
+		flush_tsb_user(mp);
+
 		if (CTX_VALID(mp->mm->context)) {
 #ifdef CONFIG_SMP
 			smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -89,62 +91,3 @@ no_cache_flush:
 	if (nr >= TLB_BATCH_NR)
 		flush_tlb_pending();
 }
-
-void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
-	unsigned long nr = mp->tlb_nr;
-	long s = start, e = end, vpte_base;
-
-	if (mp->fullmm)
-		return;
-
-	/* If start is greater than end, that is a real problem.  */
-	BUG_ON(start > end);
-
-	/* However, straddling the VA space hole is quite normal. */
-	s &= PMD_MASK;
-	e = (e + PMD_SIZE - 1) & PMD_MASK;
-
-	vpte_base = (tlb_type == spitfire ?
-		     VPTE_BASE_SPITFIRE :
-		     VPTE_BASE_CHEETAH);
-
-	if (unlikely(nr != 0 && mm != mp->mm)) {
-		flush_tlb_pending();
-		nr = 0;
-	}
-
-	if (nr == 0)
-		mp->mm = mm;
-
-	start = vpte_base + (s >> (PAGE_SHIFT - 3));
-	end = vpte_base + (e >> (PAGE_SHIFT - 3));
-
-	/* If the request straddles the VA space hole, we
-	 * need to swap start and end.  The reason this
-	 * occurs is that "vpte_base" is the center of
-	 * the linear page table mapping area.  Thus,
-	 * high addresses with the sign bit set map to
-	 * addresses below vpte_base and non-sign bit
-	 * addresses map to addresses above vpte_base.
-	 */
-	if (end < start) {
-		unsigned long tmp = start;
-
-		start = end;
-		end = tmp;
-	}
-
-	while (start < end) {
-		mp->vaddrs[nr] = start;
-		mp->tlb_nr = ++nr;
-		if (nr >= TLB_BATCH_NR) {
-			flush_tlb_pending();
-			nr = 0;
-		}
-		start += PAGE_SIZE;
-	}
-	if (nr)
-		flush_tlb_pending();
-}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 00000000000..15e8af58b1d
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,84 @@
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#define TSB_ENTRY_ALIGNMENT	16
+
+struct tsb {
+	unsigned long tag;
+	unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
+/* We use an 8K TSB for the whole kernel, this allows to
+ * handle about 4MB of modules and vmalloc mappings without
+ * incurring many hash conflicts.
+ */
+#define KERNEL_TSB_SIZE_BYTES	8192
+#define KERNEL_TSB_NENTRIES \
+	(KERNEL_TSB_SIZE_BYTES / sizeof(struct tsb))
+
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static inline unsigned long tsb_hash(unsigned long vaddr)
+{
+	vaddr >>= PAGE_SHIFT;
+	return vaddr & (KERNEL_TSB_NENTRIES - 1);
+}
+
+static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
+{
+	if (context == ~0UL)
+		return 1;
+
+	return (entry->tag == ((vaddr >> 22) | (context << 48)));
+}
+
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long v;
+
+	for (v = start; v < end; v += PAGE_SIZE) {
+		struct tsb *ent = &swapper_tsb[tsb_hash(v)];
+
+		if (tag_compare(ent, v, 0)) {
+			ent->tag = 0UL;
+			membar_storeload_storestore();
+		}
+	}
+}
+
+void flush_tsb_user(struct mmu_gather *mp)
+{
+	struct mm_struct *mm = mp->mm;
+	struct tsb *tsb = (struct tsb *) mm->context.sparc64_tsb;
+	unsigned long ctx = ~0UL;
+	int i;
+
+	if (CTX_VALID(mm->context))
+		ctx = CTX_HWBITS(mm->context);
+
+	for (i = 0; i < mp->tlb_nr; i++) {
+		unsigned long v = mp->vaddrs[i];
+		struct tsb *ent;
+
+		v &= ~0x1UL;
+
+		ent = &tsb[tsb_hash(v)];
+		if (tag_compare(ent, v, ctx)) {
+			ent->tag = 0UL;
+			membar_storeload_storestore();
+		}
+	}
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa11..22791f29552 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -453,64 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
 	nop
 	nop
 
-	.data
-
-errata32_hwbug:
-	.xword	0
-
-	.text
-
-	/* These two are not performance critical... */
-	.globl		xcall_flush_tlb_all_spitfire
-xcall_flush_tlb_all_spitfire:
-	/* Spitfire Errata #32 workaround. */
-	sethi		%hi(errata32_hwbug), %g4
-	stx		%g0, [%g4 + %lo(errata32_hwbug)]
-
-	clr		%g2
-	clr		%g3
-1:	ldxa		[%g3] ASI_DTLB_DATA_ACCESS, %g4
-	and		%g4, _PAGE_L, %g5
-	brnz,pn		%g5, 2f
-	 mov		TLB_TAG_ACCESS, %g7
-
-	stxa		%g0, [%g7] ASI_DMMU
-	membar		#Sync
-	stxa		%g0, [%g3] ASI_DTLB_DATA_ACCESS
-	membar		#Sync
-
-	/* Spitfire Errata #32 workaround. */
-	sethi		%hi(errata32_hwbug), %g4
-	stx		%g0, [%g4 + %lo(errata32_hwbug)]
-
-2:	ldxa		[%g3] ASI_ITLB_DATA_ACCESS, %g4
-	and		%g4, _PAGE_L, %g5
-	brnz,pn		%g5, 2f
-	 mov		TLB_TAG_ACCESS, %g7
-
-	stxa		%g0, [%g7] ASI_IMMU
-	membar		#Sync
-	stxa		%g0, [%g3] ASI_ITLB_DATA_ACCESS
-	membar		#Sync
-
-	/* Spitfire Errata #32 workaround. */
-	sethi		%hi(errata32_hwbug), %g4
-	stx		%g0, [%g4 + %lo(errata32_hwbug)]
-
-2:	add		%g2, 1, %g2
-	cmp		%g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
-	ble,pt		%icc, 1b
-	 sll		%g2, 3, %g3
-	flush		%g6
-	retry
-
-	.globl		xcall_flush_tlb_all_cheetah
-xcall_flush_tlb_all_cheetah:
-	mov		0x80, %g2
-	stxa		%g0, [%g2] ASI_DMMU_DEMAP
-	stxa		%g0, [%g2] ASI_IMMU_DEMAP
-	retry
-
 	/* These just get rescheduled to PIL vectors. */
 	.globl		xcall_call_function
 xcall_call_function:
author	David S. Miller <davem@davemloft.net>	2006-01-31 18:29:18 -0800
committer	David S. Miller <davem@sunset.davemloft.net>	2006-03-20 01:11:13 -0800
commit	74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch)
tree	c23dea461e32485f4cd7ca4b8c33c632655eb906 /arch/sparc64/mm
parent	30d4d1ffed7098afe2641536d67eef150499da02 (diff)