summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-02-23 14:19:28 -0800
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 01:14:00 -0800
commita0663a79ad4faebe1db4a56e2e767b120b12333a (patch)
tree612a53e387a6aea6116f8a1637050fa13c6d9f80 /arch
parent074d82cf688fe2dfa7ba4a2317c56f62d13fb522 (diff)
[SPARC64]: Fix TLB context allocation with SMT style shared TLBs.
The context allocation scheme we use depends upon there being a 1<-->1 mapping from cpu to physical TLB for correctness. Chips like Niagara break this assumption. So what we do is notify all cpus with a cross call when the context version number changes, and if necessary this makes them allocate a valid context for the address space they are running at the time. Stress tested with make -j1024, make -j2048, and make -j4096 kernel builds on a 32-strand, 8 core, T2000 with 16GB of ram. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/smp.c40
-rw-r--r--arch/sparc64/mm/init.c9
2 files changed, 37 insertions, 12 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 0cd9b16612e..1ce94081149 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
put_cpu();
}
+static void __smp_receive_signal_mask(cpumask_t mask)
+{
+ smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
+}
+
void smp_receive_signal(int cpu)
{
cpumask_t mask = cpumask_of_cpu(cpu);
- if (cpu_online(cpu)) {
- u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
-
- if (tlb_type == spitfire)
- spitfire_xcall_deliver(data0, 0, 0, mask);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_xcall_deliver(data0, 0, 0, mask);
- else if (tlb_type == hypervisor)
- hypervisor_xcall_deliver(data0, 0, 0, mask);
- }
+ if (cpu_online(cpu))
+ __smp_receive_signal_mask(mask);
}
void smp_receive_signal_client(int irq, struct pt_regs *regs)
{
- /* Just return, rtrap takes care of the rest. */
+ struct mm_struct *mm;
+
clear_softint(1 << irq);
+
+ /* See if we need to allocate a new TLB context because
+ * the version of the one we are using is now out of date.
+ */
+ mm = current->active_mm;
+ if (likely(mm)) {
+ if (unlikely(!CTX_VALID(mm->context))) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+ get_new_mmu_context(mm);
+ load_secondary_context(mm);
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+ }
+ }
+}
+
+void smp_new_mmu_context_version(void)
+{
+ __smp_receive_signal_mask(cpu_online_map);
}
void smp_report_regs(void)
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 16f0db38d93..ccf083aecb6 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
* let the user have CTX 0 (nucleus) or we ever use a CTX
* version of zero (and thus NO_CONTEXT would not be caught
* by version mis-match tests in mmu_context.h).
+ *
+ * Always invoked with interrupts disabled.
*/
void get_new_mmu_context(struct mm_struct *mm)
{
unsigned long ctx, new_ctx;
unsigned long orig_pgsz_bits;
-
+ int new_version;
spin_lock(&ctx_alloc_lock);
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+ new_version = 0;
if (new_ctx >= (1 << CTX_NR_BITS)) {
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
if (new_ctx >= ctx) {
@@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
mmu_context_bmap[i + 2] = 0;
mmu_context_bmap[i + 3] = 0;
}
+ new_version = 1;
goto out;
}
}
@@ -671,6 +675,9 @@ out:
tlb_context_cache = new_ctx;
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
spin_unlock(&ctx_alloc_lock);
+
+ if (unlikely(new_version))
+ smp_new_mmu_context_version();
}
void sparc_ultra_dump_itlb(void)