From b6ff32d9aaeeeecf98f9a852d715569183585312 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 14:26:51 -0700 Subject: x86, PAT: Consolidate code in pat_x_mtrr_type() and reserve_memtype() Fix pat_x_mtrr_type() to use UC_MINUS when the mtrr type return UC. This is to be consistent with ioremap() and ioremap_nocache() which uses UC_MINUS. Consolidate the code such that reserve_memtype() also uses pat_x_mtrr_type() when the caller doesn't specify any special attribute (non WB attribute). Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20090409212708.939936000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'arch/x86/mm/pat.c') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 640339ee4fb..8d3de958050 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -182,10 +182,10 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) u8 mtrr_type; mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == MTRR_TYPE_UNCACHABLE) - return _PAGE_CACHE_UC; - if (mtrr_type == MTRR_TYPE_WRCOMB) - return _PAGE_CACHE_WC; + if (mtrr_type != MTRR_TYPE_WRBACK) + return _PAGE_CACHE_UC_MINUS; + + return _PAGE_CACHE_WB; } return req_type; @@ -352,23 +352,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return 0; } - if (req_type == -1) { - /* - * Call mtrr_lookup to get the type hint. This is an - * optimization for /dev/mem mmap'ers into WB memory (BIOS - * tools and ACPI tools). Use WB request for WB memory and use - * UC_MINUS otherwise. - */ - u8 mtrr_type = mtrr_type_lookup(start, end); - - if (mtrr_type == MTRR_TYPE_WRBACK) - actual_type = _PAGE_CACHE_WB; - else - actual_type = _PAGE_CACHE_UC_MINUS; - } else { - actual_type = pat_x_mtrr_type(start, end, - req_type & _PAGE_CACHE_MASK); - } + /* + * Call mtrr_lookup to get the type hint. This is an + * optimization for /dev/mem mmap'ers into WB memory (BIOS + * tools and ACPI tools). Use WB request for WB memory and use + * UC_MINUS otherwise. + */ + actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK); if (new_type) *new_type = actual_type; @@ -587,7 +577,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (flags != -1) { retval = reserve_memtype(offset, offset + size, flags, NULL); } else { - retval = reserve_memtype(offset, offset + size, -1, &flags); + retval = reserve_memtype(offset, offset + size, + _PAGE_CACHE_WB, &flags); } if (retval < 0) -- cgit v1.2.3-70-g09d2 From 0c3c8a18361a636069f5a5d9d0d0f9c2124e6b94 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 14:26:52 -0700 Subject: x86, PAT: Remove duplicate memtype reserve in devmem mmap /dev/mem mmap code was doing memtype reserve/free for a while now. Recently we added memtype tracking in remap_pfn_range, and /dev/mem mmap uses it indirectly. So, we don't need seperate tracking in /dev/mem code any more. That means another ~100 lines of code removed :-). Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20090409212709.085210000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 4 ---- arch/x86/mm/pat.c | 60 ++-------------------------------------------- drivers/char/mem.c | 27 --------------------- 3 files changed, 2 insertions(+), 89 deletions(-) (limited to 'arch/x86/mm/pat.c') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 2cd07b9422f..7af14e512f9 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -18,9 +18,5 @@ extern int free_memtype(u64 start, u64 end); extern int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flag); -extern void map_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); -extern void unmap_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8d3de958050..cc5e0e24e44 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -536,9 +536,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { - u64 offset = ((u64) pfn) << PAGE_SHIFT; - unsigned long flags = -1; - int retval; + unsigned long flags = _PAGE_CACHE_WB; if (!range_is_allowed(pfn, size)) return 0; @@ -566,65 +564,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, } #endif - /* - * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. - * - * Without O_SYNC, we want to get - * - WB for WB-able memory and no other conflicting mappings - * - UC_MINUS for non-WB-able memory with no other conflicting mappings - * - Inherit from confliting mappings otherwise - */ - if (flags != -1) { - retval = reserve_memtype(offset, offset + size, flags, NULL); - } else { - retval = reserve_memtype(offset, offset + size, - _PAGE_CACHE_WB, &flags); - } - - if (retval < 0) - return 0; - - if (((pfn < max_low_pfn_mapped) || - (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { - free_memtype(offset, offset + size); - printk(KERN_INFO - "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", - current->comm, current->pid, - cattr_name(flags), - offset, (unsigned long long)(offset + size)); - return 0; - } - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | flags); return 1; } -void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) -{ - unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); - u64 addr = (u64)pfn << PAGE_SHIFT; - unsigned long flags; - - reserve_memtype(addr, addr + size, want_flags, &flags); - if (flags != want_flags) { - printk(KERN_INFO - "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - addr, (unsigned long long)(addr + size), - cattr_name(flags)); - } -} - -void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) -{ - u64 addr = (u64)pfn << PAGE_SHIFT; - - free_memtype(addr, addr + size); -} - /* * Change the memory type for the physial address range in kernel identity * mapping space if that range is a part of identity map. @@ -662,8 +606,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, { int is_ram = 0; int ret; - unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); + unsigned long flags = want_flags; is_ram = pat_pagerange_is_ram(paddr, paddr + size); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 3586b3b3df3..8f05c38c2f0 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -301,33 +301,7 @@ static inline int private_mapping_ok(struct vm_area_struct *vma) } #endif -void __attribute__((weak)) -map_devmem(unsigned long pfn, unsigned long len, pgprot_t prot) -{ - /* nothing. architectures can override. */ -} - -void __attribute__((weak)) -unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t prot) -{ - /* nothing. architectures can override. */ -} - -static void mmap_mem_open(struct vm_area_struct *vma) -{ - map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -static void mmap_mem_close(struct vm_area_struct *vma) -{ - unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - static struct vm_operations_struct mmap_mem_ops = { - .open = mmap_mem_open, - .close = mmap_mem_close, #ifdef CONFIG_HAVE_IOREMAP_PROT .access = generic_access_phys #endif @@ -362,7 +336,6 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) vma->vm_pgoff, size, vma->vm_page_prot)) { - unmap_devmem(vma->vm_pgoff, size, vma->vm_page_prot); return -EAGAIN; } return 0; -- cgit v1.2.3-70-g09d2 From 4b065046273afa01ec8e3de7da407e8d3599251d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 8 Apr 2009 15:37:16 -0700 Subject: x86, PAT: Remove page granularity tracking for vm_insert_pfn maps This change resolves the problem of too many single page entries in pat_memtype_list and "freeing invalid memtype" errors with i915, reported here: http://marc.info/?l=linux-kernel&m=123845244713183&w=2 Remove page level granularity track and untrack of vm_insert_pfn. memtype tracking at page granularity does not scale and cleaner approach would be for the driver to request a type for a bigger IO address range or PCI io memory range for that device, either at mmap time or driver init time and just use that type during vm_insert_pfn. This patch just removes the track/untrack of vm_insert_pfn. That means we will be in same state as 2.6.28, with respect to these APIs. Newer APIs for the drivers to request a memtype for a bigger region is coming soon. [ Impact: fix Xorg startup warnings and hangs ] Reported-by: Arkadiusz Miskiewicz Tested-by: Arkadiusz Miskiewicz Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Jesse Barnes LKML-Reference: <20090408223716.GC3493@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 98 +++++++++++-------------------------------------------- 1 file changed, 19 insertions(+), 79 deletions(-) (limited to 'arch/x86/mm/pat.c') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index cc5e0e24e44..41c80571815 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -669,29 +669,28 @@ static void free_pfn_range(u64 paddr, unsigned long size) * * If the vma has a linear pfn mapping for the entire range, we get the prot * from pte and reserve the entire vma range with single reserve_pfn_range call. - * Otherwise, we reserve the entire vma range, my ging through the PTEs page - * by page to get physical address and protection. */ int track_pfn_vma_copy(struct vm_area_struct *vma) { - int retval = 0; - unsigned long i, j; resource_size_t paddr; unsigned long prot; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (!pat_enabled) return 0; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* * reserve the whole chunk covered by vma. We need the * starting address and protection from pte. */ - if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { WARN_ON_ONCE(1); return -EINVAL; } @@ -699,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } - /* reserve entire vma page by page, using pfn and prot from pte */ - for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) - continue; - - pgprot = __pgprot(prot); - retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); - if (retval) - goto cleanup_ret; - } return 0; - -cleanup_ret: - /* Reserve error: Cleanup partial reservation and return error */ - for (j = 0; j < i; j += PAGE_SIZE) { - if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) - continue; - - free_pfn_range(paddr, PAGE_SIZE); - } - - return retval; } /* @@ -730,50 +708,28 @@ cleanup_ret: * prot is passed in as a parameter for the new mapping. If the vma has a * linear pfn mapping for the entire range reserve the entire vma range with * single reserve_pfn_range call. - * Otherwise, we look t the pfn and size and reserve only the specified range - * page by page. - * - * Note that this function can be called with caller trying to map only a - * subrange/page inside the vma. */ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { - int retval = 0; - unsigned long i, j; - resource_size_t base_paddr; resource_size_t paddr; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; if (!pat_enabled) return 0; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; return reserve_pfn_range(paddr, vma_size, prot, 0); } - /* reserve page by page using pfn and size */ - base_paddr = (resource_size_t)pfn << PAGE_SHIFT; - for (i = 0; i < size; i += PAGE_SIZE) { - paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); - if (retval) - goto cleanup_ret; - } return 0; - -cleanup_ret: - /* Reserve error: Cleanup partial reservation and return error */ - for (j = 0; j < i; j += PAGE_SIZE) { - paddr = base_paddr + j; - free_pfn_range(paddr, PAGE_SIZE); - } - - return retval; } /* @@ -784,39 +740,23 @@ cleanup_ret: void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, unsigned long size) { - unsigned long i; resource_size_t paddr; - unsigned long prot; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; if (!pat_enabled) return; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* free the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; free_pfn_range(paddr, vma_size); return; } - - if (size != 0 && size != vma_size) { - /* free page by page, using pfn and size */ - paddr = (resource_size_t)pfn << PAGE_SHIFT; - for (i = 0; i < size; i += PAGE_SIZE) { - paddr = paddr + i; - free_pfn_range(paddr, PAGE_SIZE); - } - } else { - /* free entire vma, page by page, using the pfn from pte */ - for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) - continue; - - free_pfn_range(paddr, PAGE_SIZE); - } - } } pgprot_t pgprot_writecombine(pgprot_t prot) -- cgit v1.2.3-70-g09d2