From 7fccfc00c003c855936970facdbb667bae9dbe9a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 14 Apr 2009 13:07:35 +0100 Subject: [ARM] 5450/1: Flush only the needed range when unmapping a VMA When unmapping N pages (e.g. shared memory) the amount of TLB flushes done can be (N*PAGE_SIZE/ZAP_BLOCK_SIZE)*N although it should be N at maximum. With PREEMPT kernel ZAP_BLOCK_SIZE is 8 pages, so there is a noticeable performance penalty when unmapping a large VMA and the system is spending its time in flush_tlb_range(). The problem is that tlb_end_vma() is always flushing the full VMA range. The subrange that needs to be flushed can be calculated by tlb_remove_tlb_entry(). This approach was suggested by Hugh Dickins, and is also used by other arches. The speed increase is roughly 3x for 8M mappings and for larger mappings even more. Signed-off-by: Aaro Koskinen Signed-off-by: Russell King --- arch/arm/include/asm/tlb.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 857f1dfac79..321c83e43a1 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -36,6 +36,8 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; + unsigned long range_start; + unsigned long range_end; }; DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -63,7 +65,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) put_cpu_var(mmu_gathers); } -#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) +/* + * Memorize the range for the TLB flush. + */ +static inline void +tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) +{ + if (!tlb->fullmm) { + if (addr < tlb->range_start) + tlb->range_start = addr; + if (addr + PAGE_SIZE > tlb->range_end) + tlb->range_end = addr + PAGE_SIZE; + } +} /* * In the case of tlb vma handling, we can optimise these away in the @@ -73,15 +87,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { - if (!tlb->fullmm) + if (!tlb->fullmm) { flush_cache_range(vma, vma->vm_start, vma->vm_end); + tlb->range_start = TASK_SIZE; + tlb->range_end = 0; + } } static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { - if (!tlb->fullmm) - flush_tlb_range(vma, vma->vm_start, vma->vm_end); + if (!tlb->fullmm && tlb->range_end > 0) + flush_tlb_range(vma, tlb->range_start, tlb->range_end); } #define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) -- cgit v1.2.3-70-g09d2