From dcc17d1baef3721d1574e5b2f4f2d4607514bcff Mon Sep 17 00:00:00 2001
From: Peter Keilty <peter.keilty@hp.com>
Date: Mon, 31 Oct 2005 16:44:47 -0500
Subject: [IA64] Use bitmaps for efficient context allocation/free

Corrects the very inefficent method of finding free context_ids in
get_mmu_context().  Instead of walking the task_list of all processes,
2 bitmaps are used to efficently store and lookup state, inuse and
needs flushing. The entire rid address space is now used before calling
wrap_mmu_context and global tlb flushing.

Special thanks to Ken and Rohit for their review and modifications in
using a bit flushmap.

Signed-off-by: Peter Keilty <peter.keilty@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c |  1 +
 arch/ia64/mm/tlb.c       | 56 +++++++++++++++++++++++-------------------------
 2 files changed, 28 insertions(+), 29 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fc56ca2da35..c9388a92cf4 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -454,6 +454,7 @@ setup_arch (char **cmdline_p)
 #endif
 
 	cpu_init();	/* initialize the bootstrap CPU */
+	mmu_context_init();	/* initialize context_id bitmap */
 
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c79a9b96d02..39628fca274 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
  *		Modified RID allocation for SMP
  *          Goutham Rao <goutham.rao@intel.com>
  *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -16,12 +18,14 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/bootmem.h>
 
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -31,49 +35,43 @@ static struct {
 struct ia64_ctx ia64_ctx = {
 	.lock =		SPIN_LOCK_UNLOCKED,
 	.next =		1,
-	.limit =	(1 << 15) - 1,		/* start out with the safe (architected) limit */
 	.max_ctx =	~0U
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
 
+/*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+
 /*
  * Acquire the ia64_ctx.lock before calling this function!
  */
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
-	struct task_struct *tsk;
 	int i;
+	unsigned long flush_bit;
 
-	if (ia64_ctx.next > max_ctx)
-		ia64_ctx.next = 300;	/* skip daemons */
-	ia64_ctx.limit = max_ctx + 1;
-
-	/*
-	 * Scan all the task's mm->context and set proper safe range
-	 */
-
-	read_lock(&tasklist_lock);
-  repeat:
-	for_each_process(tsk) {
-		if (!tsk->mm)
-			continue;
-		tsk_context = tsk->mm->context;
-		if (tsk_context == ia64_ctx.next) {
-			if (++ia64_ctx.next >= ia64_ctx.limit) {
-				/* empty range: reset the range limit and start over */
-				if (ia64_ctx.next > max_ctx)
-					ia64_ctx.next = 300;
-				ia64_ctx.limit = max_ctx + 1;
-				goto repeat;
-			}
-		}
-		if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-			ia64_ctx.limit = tsk_context;
+	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
+		flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
+		ia64_ctx.bitmap[i] ^= flush_bit;
 	}
-	read_unlock(&tasklist_lock);
+ 
+	/* use offset at 300 to skip daemons */
+	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+				ia64_ctx.max_ctx, 300);
+	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+				ia64_ctx.max_ctx, ia64_ctx.next);
+
 	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
 	{
 		int cpu = get_cpu(); /* prevent preemption/migration */
-- 
cgit v1.2.3-70-g09d2


From 58cd90829918dabbd81a453de676d41fb7b628ad Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Sat, 29 Oct 2005 18:47:04 -0700
Subject: [IA64] make mmu_context.h and tlb.c 80-column friendly

wrap_mmu_context(), delayed_tlb_flush(), get_mmu_context() all
have an extra { } block which cause one extra indentation.
get_mmu_context() is particularly bad with 5 indentations to
the most inner "if".  It finally gets on my nerve that I can't
keep the code within 80 columns.  Remove the extra { } block
and while I'm at it, reformat all the comments to 80-column
friendly.  No functional change at all with this patch.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/tlb.c             | 33 +++++++++--------
 include/asm-ia64/mmu_context.h | 80 ++++++++++++++++++++++--------------------
 2 files changed, 59 insertions(+), 54 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 39628fca274..41105d45442 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -29,7 +29,7 @@
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
-	unsigned long max_bits;	/* log2() of largest supported purge page-size */
+	unsigned long max_bits;	/* log2 of largest supported purge page-size */
 } purge;
 
 struct ia64_ctx ia64_ctx = {
@@ -58,7 +58,7 @@ mmu_context_init (void)
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-	int i;
+	int i, cpu;
 	unsigned long flush_bit;
 
 	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
@@ -72,20 +72,21 @@ wrap_mmu_context (struct mm_struct *mm)
 	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
 				ia64_ctx.max_ctx, ia64_ctx.next);
 
-	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
-	{
-		int cpu = get_cpu(); /* prevent preemption/migration */
-		for_each_online_cpu(i) {
-			if (i != cpu)
-				per_cpu(ia64_need_tlb_flush, i) = 1;
-		}
-		put_cpu();
-	}
+	/*
+	 * can't call flush_tlb_all() here because of race condition
+	 * with O(1) scheduler [EF]
+	 */
+	cpu = get_cpu(); /* prevent preemption/migration */
+	for_each_online_cpu(i)
+		if (i != cpu)
+			per_cpu(ia64_need_tlb_flush, i) = 1;
+	put_cpu();
 	local_flush_tlb_all();
 }
 
 void
-ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+		       unsigned long end, unsigned long nbits)
 {
 	static DEFINE_SPINLOCK(ptcg_lock);
 
@@ -133,7 +134,8 @@ local_flush_tlb_all (void)
 }
 
 void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+		 unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long size = end - start;
@@ -147,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 #endif
 
 	nbits = ia64_fls(size + 0xfff);
-	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+			(nbits < purge.max_bits))
 		++nbits;
 	if (nbits > purge.max_bits)
 		nbits = purge.max_bits;
@@ -189,5 +192,5 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
-	local_flush_tlb_all();		/* nuke left overs from bootstrapping... */
+	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
 }
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 8d9b30b5f7d..b5c65081a3a 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -7,12 +7,13 @@
  */
 
 /*
- * Routines to manage the allocation of task context numbers.  Task context numbers are
- * used to reduce or eliminate the need to perform TLB flushes due to context switches.
- * Context numbers are implemented using ia-64 region ids.  Since the IA-64 TLB does not
- * consider the region number when performing a TLB lookup, we need to assign a unique
- * region id to each region in a process.  We use the least significant three bits in a
- * region id for this purpose.
+ * Routines to manage the allocation of task context numbers.  Task context
+ * numbers are used to reduce or eliminate the need to perform TLB flushes
+ * due to context switches.  Context numbers are implemented using ia-64
+ * region ids.  Since the IA-64 TLB does not consider the region number when
+ * performing a TLB lookup, we need to assign a unique region id to each
+ * region in a process.  We use the least significant three bits in aregion
+ * id for this purpose.
  */
 
 #define IA64_REGION_ID_KERNEL	0 /* the kernel's region id (tlb.c depends on this being 0) */
@@ -51,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
 }
 
 /*
- * When the context counter wraps around all TLBs need to be flushed because an old
- * context number might have been reused. This is signalled by the ia64_need_tlb_flush
- * per-CPU variable, which is checked in the routine below. Called by activate_mm().
- * <efocht@ess.nec.de>
+ * When the context counter wraps around all TLBs need to be flushed because
+ * an old context number might have been reused. This is signalled by the
+ * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
+ * below. Called by activate_mm(). <efocht@ess.nec.de>
  */
 static inline void
 delayed_tlb_flush (void)
@@ -64,11 +65,9 @@ delayed_tlb_flush (void)
 
 	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
 		spin_lock_irqsave(&ia64_ctx.lock, flags);
-		{
-			if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
-				local_flush_tlb_all();
-				__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
-			}
+		if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+			local_flush_tlb_all();
+			__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
 		}
 		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
 	}
@@ -80,27 +79,27 @@ get_mmu_context (struct mm_struct *mm)
 	unsigned long flags;
 	nv_mm_context_t context = mm->context;
 
-	if (unlikely(!context)) {
-		spin_lock_irqsave(&ia64_ctx.lock, flags);
-		{
-			/* re-check, now that we've got the lock: */
-			context = mm->context;
-			if (context == 0) {
-				cpus_clear(mm->cpu_vm_mask);
-				if (ia64_ctx.next >= ia64_ctx.limit) {
-					ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
-							ia64_ctx.max_ctx, ia64_ctx.next);
-					ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
-							ia64_ctx.max_ctx, ia64_ctx.next);
-					if (ia64_ctx.next >= ia64_ctx.max_ctx)
-						wrap_mmu_context(mm);
-				}
-				mm->context = context = ia64_ctx.next++;
-				__set_bit(context, ia64_ctx.bitmap);
-			}
+	if (likely(context))
+		goto out;
+
+	spin_lock_irqsave(&ia64_ctx.lock, flags);
+	/* re-check, now that we've got the lock: */
+	context = mm->context;
+	if (context == 0) {
+		cpus_clear(mm->cpu_vm_mask);
+		if (ia64_ctx.next >= ia64_ctx.limit) {
+			ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			if (ia64_ctx.next >= ia64_ctx.max_ctx)
+				wrap_mmu_context(mm);
 		}
-		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+		mm->context = context = ia64_ctx.next++;
+		__set_bit(context, ia64_ctx.bitmap);
 	}
+	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+out:
 	/*
 	 * Ensure we're not starting to use "context" before any old
 	 * uses of it are gone from our TLB.
@@ -111,8 +110,8 @@ get_mmu_context (struct mm_struct *mm)
 }
 
 /*
- * Initialize context number to some sane value.  MM is guaranteed to be a brand-new
- * address-space, so no TLB flushing is needed, ever.
+ * Initialize context number to some sane value.  MM is guaranteed to be a
+ * brand-new address-space, so no TLB flushing is needed, ever.
  */
 static inline int
 init_new_context (struct task_struct *p, struct mm_struct *mm)
@@ -173,7 +172,10 @@ activate_context (struct mm_struct *mm)
 		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
 			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 		reload_context(context);
-		/* in the unlikely event of a TLB-flush by another thread, redo the load: */
+		/*
+		 * in the unlikely event of a TLB-flush by another thread,
+		 * redo the load.
+		 */
 	} while (unlikely(context != mm->context));
 }
 
@@ -186,8 +188,8 @@ static inline void
 activate_mm (struct mm_struct *prev, struct mm_struct *next)
 {
 	/*
-	 * We may get interrupts here, but that's OK because interrupt handlers cannot
-	 * touch user-space.
+	 * We may get interrupts here, but that's OK because interrupt
+	 * handlers cannot touch user-space.
 	 */
 	ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
 	activate_context(next);
-- 
cgit v1.2.3-70-g09d2