diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-30 11:38:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-30 11:38:31 -0700 |
commit | 019abbc87025a030fd25008612afd4eff8a375f7 (patch) | |
tree | 6d745dedcf90ceff8f5b7b996a17f666b7c574e3 /arch/x86/mm | |
parent | 2d25ee36c84d5b2d6be8bfaf80256ecad69a06ca (diff) | |
parent | 5a3c8fe7353f78b73b9636353c6f7b881f19ebea (diff) |
Merge branch 'x86-stage-3-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-stage-3-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (190 commits)
Revert "cpuacct: reduce one NULL check in fast-path"
Revert "x86: don't compile vsmp_64 for 32bit"
x86: Correct behaviour of irq affinity
x86: early_ioremap_init(), use __fix_to_virt(), because we are sure it's safe
x86: use default_cpu_mask_to_apicid for 64bit
x86: fix set_extra_move_desc calling
x86, PAT, PCI: Change vma prot in pci_mmap to reflect inherited prot
x86/dmi: fix dmi_alloc() section mismatches
x86: e820 fix various signedness issues in setup.c and e820.c
x86: apic/io_apic.c define msi_ir_chip and ir_ioapic_chip all the time
x86: irq.c keep CONFIG_X86_LOCAL_APIC interrupts together
x86: irq.c use same path for show_interrupts
x86: cpu/cpu.h cleanup
x86: Fix a couple of sparse warnings in arch/x86/kernel/apic/io_apic.c
Revert "x86: create a non-zero sized bm_pte only when needed"
x86: pci-nommu.c cleanup
x86: io_delay.c cleanup
x86: rtc.c cleanup
x86: i8253 cleanup
x86: kdebugfs.c cleanup
...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/highmem_32.c | 24 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 344 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 256 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 280 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 52 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 147 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 5 |
13 files changed, 589 insertions, 562 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 00f127c80b0..522db5e3d0b 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,22 +121,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -/* This is the same as kmap_atomic() but can map memory that doesn't +/* + * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) { - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, kmap_prot); } EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ @@ -158,7 +149,6 @@ EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); -#ifdef CONFIG_NUMA void __init set_highmem_pages_init(void) { struct zone *zone; @@ -182,11 +172,3 @@ void __init set_highmem_pages_init(void) } totalram_pages += totalhigh_pages; } -#else -void __init set_highmem_pages_init(void) -{ - add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); - - totalram_pages += totalhigh_pages; -} -#endif /* CONFIG_NUMA */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce6a722587d..fd3da1dda1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1,8 +1,345 @@ +#include <linux/ioport.h> #include <linux/swap.h> + #include <asm/cacheflush.h> +#include <asm/e820.h> +#include <asm/init.h> #include <asm/page.h> +#include <asm/page_types.h> #include <asm/sections.h> #include <asm/system.h> +#include <asm/tlbflush.h> + +unsigned long __initdata e820_table_start; +unsigned long __meminitdata e820_table_end; +unsigned long __meminitdata e820_table_top; + +int after_bootmem; + +int direct_gbpages +#ifdef CONFIG_DIRECT_GBPAGES + = 1 +#endif +; + +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) +{ + unsigned long puds, pmds, ptes, tables, start; + + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + + if (use_gbpages) { + unsigned long extra; + + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 + extra += PMD_SIZE; +#endif + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + +#ifdef CONFIG_X86_32 + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ +#ifdef CONFIG_X86_32 + start = 0x7000; + e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, + tables, PAGE_SIZE); +#else /* CONFIG_X86_64 */ + start = 0x8000; + e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); +#endif + if (e820_table_start == -1UL) + panic("Cannot find space for the kernel page tables"); + + e820_table_start >>= PAGE_SHIFT; + e820_table_end = e820_table_start; + e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); +} + +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#ifdef CONFIG_X86_32 +#define NR_RANGE_MR 3 +#else /* CONFIG_X86_64 */ +#define NR_RANGE_MR 5 +#endif + +static int __meminit save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<<PAGE_SHIFT; + mr[nr_range].end = end_pfn<<PAGE_SHIFT; + mr[nr_range].page_size_mask = page_size_mask; + nr_range++; + } + + return nr_range; +} + +#ifdef CONFIG_X86_64 +static void __init init_gbpages(void) +{ + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +} +#else +static inline void init_gbpages(void) +{ +} +#endif + +/* + * Setup the direct mapping of the physical memory at PAGE_OFFSET. + * This runs before bootmem is initialized and gets pages directly from + * the physical memory. To access them they are temporarily mapped. + */ +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) +{ + unsigned long page_size_mask = 0; + unsigned long start_pfn, end_pfn; + unsigned long ret = 0; + unsigned long pos; + + struct map_range mr[NR_RANGE_MR]; + int nr_range, i; + int use_pse, use_gbpages; + + printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); + + if (!after_bootmem) + init_gbpages(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * This will simplify cpa(), which otherwise needs to support splitting + * large pages into small in interrupt context, etc. + */ + use_pse = use_gbpages = 0; +#else + use_pse = cpu_has_pse; + use_gbpages = direct_gbpages; +#endif + +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); +#endif + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } +#endif + + if (use_gbpages) + page_size_mask |= 1 << PG_LEVEL_1G; + if (use_pse) + page_size_mask |= 1 << PG_LEVEL_2M; + + memset(mr, 0, sizeof(mr)); + nr_range = 0; + + /* head if not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; +#ifdef CONFIG_X86_32 + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + if (pos == 0) + end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + else + end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#endif + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } + + /* big page (2M) range */ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#ifdef CONFIG_X86_32 + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +#endif + + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + pos = end_pfn << PAGE_SHIFT; + } + +#ifdef CONFIG_X86_64 + /* big page (1G) range */ + start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); + pos = end_pfn << PAGE_SHIFT; + } + + /* tail is not big page (1G) alignment */ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + pos = end_pfn << PAGE_SHIFT; + } +#endif + + /* tail is not big page (2M) alignment */ + start_pfn = pos>>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof(struct map_range)); + mr[i--].start = old_start; + nr_range--; + } + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( + (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); + + /* + * Find space for the kernel direct mapping tables. + * + * Later we should allocate these tables in the local node of the + * memory mapped. Unfortunately this is done currently before the + * nodes are discovered. + */ + if (!after_bootmem) + find_early_table_space(end, use_pse, use_gbpages); + +#ifdef CONFIG_X86_32 + for (i = 0; i < nr_range; i++) + kernel_physical_mapping_init(mr[i].start, mr[i].end, + mr[i].page_size_mask); + ret = end; +#else /* CONFIG_X86_64 */ + for (i = 0; i < nr_range; i++) + ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, + mr[i].page_size_mask); +#endif + +#ifdef CONFIG_X86_32 + early_ioremap_page_table_range_init(); + + load_cr3(swapper_pg_dir); +#endif + +#ifdef CONFIG_X86_64 + if (!after_bootmem) + mmu_cr4_features = read_cr4(); +#endif + __flush_tlb_all(); + + if (!after_bootmem && e820_table_end > e820_table_start) + reserve_early(e820_table_start << PAGE_SHIFT, + e820_table_end << PAGE_SHIFT, "PGTABLE"); + + if (!after_bootmem) + early_memtest(start, end); + + return ret >> PAGE_SHIFT; +} + + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} void free_init_pages(char *what, unsigned long begin, unsigned long end) { @@ -47,3 +384,10 @@ void free_initmem(void) (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 47df0e1bbeb..db81e9a8556 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,6 +49,7 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> +#include <asm/init.h> unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); - -static unsigned long __initdata table_start; -static unsigned long __meminitdata table_end; -static unsigned long __meminitdata table_top; - -static int __initdata after_init_bootmem; +bool __read_mostly __vmalloc_start_set = false; static __init void *alloc_low_page(void) { - unsigned long pfn = table_end++; + unsigned long pfn = e820_table_end++; void *adr; - if (pfn >= table_top) + if (pfn >= e820_table_top) panic("alloc_low_page: ran out of memory"); adr = __va(pfn * PAGE_SIZE); @@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_init_bootmem) + if (after_bootmem) pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); else pmd_table = (pmd_t *)alloc_low_page(); @@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; - if (after_init_bootmem) { + if (after_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif @@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, if (pmd_idx_kmap_begin != pmd_idx_kmap_end && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < table_start - || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { + && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start + || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { pte_t *newpte; int i; - BUG_ON(after_init_bootmem); + BUG_ON(after_bootmem); newpte = alloc_low_page(); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(newpte + i, pte[i]); @@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base, - unsigned long start_pfn, - unsigned long end_pfn, - int use_pse) +unsigned long __init +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) { + int use_pse = page_size_mask == (1<<PG_LEVEL_2M); + unsigned long start_pfn, end_pfn; + pgd_t *pgd_base = swapper_pg_dir; int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; pgd_t *pgd; @@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, unsigned pages_2m, pages_4k; int mapping_iter; + start_pfn = start >> PAGE_SHIFT; + end_pfn = end >> PAGE_SHIFT; + /* * First iteration will setup identity mapping using large/small pages * based on use_pse, with other attributes same as set by @@ -369,26 +371,6 @@ repeat: mapping_iter = 2; goto repeat; } -} - -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; return 0; } @@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) +void __init early_ioremap_page_table_range_init(void) { + pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; /* @@ -641,7 +624,7 @@ static int __init noexec_setup(char *str) } early_param("noexec", noexec_setup); -static void __init set_nx(void) +void __init set_nx(void) { unsigned int v[4], l, h; @@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn, #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif + __vmalloc_start_set = true; + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); @@ -814,176 +799,66 @@ static void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } +static unsigned long __init setup_node_bootmem(int nodeid, + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long bootmap) +{ + unsigned long bootmap_size; + + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(nodeid), + bootmap >> PAGE_SHIFT, + start_pfn, end_pfn); + printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", + nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", + nodeid, bootmap, bootmap + bootmap_size); + free_bootmem_with_active_regions(nodeid, end_pfn); + early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + + return bootmap + bootmap_size; +} + void __init setup_bootmem_allocator(void) { - int i; + int nodeid; unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, - max_pfn_mapped<<PAGE_SHIFT, bootmap_size, + bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, PAGE_SIZE); if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); - /* don't touch min_low_pfn */ - bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, - min_low_pfn, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped<<PAGE_SHIFT); - printk(KERN_INFO " low ram: %08lx - %08lx\n", - min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); - printk(KERN_INFO " bootmap %08lx - %08lx\n", - bootmap, bootmap + bootmap_size); - for_each_online_node(i) - free_bootmem_with_active_regions(i, max_low_pfn); - early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); - - after_init_bootmem = 1; -} - -static void __init find_early_table_space(unsigned long end, int use_pse) -{ - unsigned long puds, pmds, ptes, tables, start; + printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + for_each_online_node(nodeid) { + unsigned long start_pfn, end_pfn; - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); - extra += PMD_SIZE; - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ - start = 0x7000; - table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, - tables, PAGE_SIZE); - if (table_start == -1UL) - panic("Cannot find space for the kernel page tables"); - - table_start >>= PAGE_SHIFT; - table_end = table_start; - table_top = table_start + (tables>>PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, - (table_start << PAGE_SHIFT) + tables); -} - -unsigned long __init_refok init_memory_mapping(unsigned long start, - unsigned long end) -{ - pgd_t *pgd_base = swapper_pg_dir; - unsigned long start_pfn, end_pfn; - unsigned long big_page_start; -#ifdef CONFIG_DEBUG_PAGEALLOC - /* - * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. - * This will simplify cpa(), which otherwise needs to support splitting - * large pages into small in interrupt context, etc. - */ - int use_pse = 0; +#ifdef CONFIG_NEED_MULTIPLE_NODES + start_pfn = node_start_pfn[nodeid]; + end_pfn = node_end_pfn[nodeid]; + if (start_pfn > max_low_pfn) + continue; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; #else - int use_pse = cpu_has_pse; + start_pfn = 0; + end_pfn = max_low_pfn; #endif - - /* - * Find space for the kernel direct mapping tables. - */ - if (!after_init_bootmem) - find_early_table_space(end, use_pse); - -#ifdef CONFIG_X86_PAE - set_nx(); - if (nx_enabled) - printk(KERN_INFO "NX (Execute Disable) protection: active\n"); -#endif - - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } - - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - big_page_start = PMD_SIZE; - - if (start < big_page_start) { - start_pfn = start >> PAGE_SHIFT; - end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); - } else { - /* head is not big page alignment ? */ - start_pfn = start >> PAGE_SHIFT; - end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); + bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, + bootmap); } - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); - - /* big page range */ - start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < (big_page_start >> PAGE_SHIFT)) - start_pfn = big_page_start >> PAGE_SHIFT; - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, - use_pse); - - /* tail is not big page alignment ? */ - start_pfn = end_pfn; - if (start_pfn > (big_page_start>>PAGE_SHIFT)) { - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, - end_pfn, 0); - } - - early_ioremap_page_table_range_init(pgd_base); - load_cr3(swapper_pg_dir); - - __flush_tlb_all(); - - if (!after_init_bootmem) - reserve_early(table_start << PAGE_SHIFT, - table_end << PAGE_SHIFT, "PGTABLE"); - - if (!after_init_bootmem) - early_memtest(start, end); - - return end >> PAGE_SHIFT; + after_bootmem = 1; } - /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. @@ -1217,13 +1092,6 @@ void mark_rodata_ro(void) } #endif -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 07f44d491df..54efa57d1c0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -48,6 +48,7 @@ #include <asm/kdebug.h> #include <asm/numa.h> #include <asm/cacheflush.h> +#include <asm/init.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ -int after_bootmem; - pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int do_not_nx __cpuinitdata; +static int disable_nx __cpuinitdata; /* * noexec=on|off @@ -107,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; + disable_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; + disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -121,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) + if (!(efer & EFER_NX) || disable_nx) __supported_pte_mask &= ~_PAGE_NX; } @@ -325,13 +318,9 @@ void __init cleanup_highmap(void) } } -static unsigned long __initdata table_start; -static unsigned long __meminitdata table_end; -static unsigned long __meminitdata table_top; - static __ref void *alloc_low_page(unsigned long *phys) { - unsigned long pfn = table_end++; + unsigned long pfn = e820_table_end++; void *adr; if (after_bootmem) { @@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= table_top) + if (pfn >= e820_table_top) panic("alloc_low_page: ran out of memory"); adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); @@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - if (use_gbpages) { - unsigned long extra; - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ - start = 0x8000; - table_start = find_e820_area(start, end, tables, PAGE_SIZE); - if (table_start == -1UL) - panic("Cannot find space for the kernel page tables"); - - table_start >>= PAGE_SHIFT; - table_end = table_start; - table_top = table_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); -} - -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} - -static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +unsigned long __init +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) { unsigned long next, last_map_addr = end; @@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, return last_map_addr; } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#define NR_RANGE_MR 5 - -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<<PAGE_SHIFT; - mr[nr_range].end = end_pfn<<PAGE_SHIFT; - mr[nr_range].page_size_mask = page_size_mask; - nr_range++; - } - - return nr_range; -} - -/* - * Setup the direct mapping of the physical memory at PAGE_OFFSET. - * This runs before bootmem is initialized and gets pages directly from - * the physical memory. To access them they are temporarily mapped. - */ -unsigned long __init_refok init_memory_mapping(unsigned long start, - unsigned long end) -{ - unsigned long last_map_addr = 0; - unsigned long page_size_mask = 0; - unsigned long start_pfn, end_pfn; - unsigned long pos; - - struct map_range mr[NR_RANGE_MR]; - int nr_range, i; - int use_pse, use_gbpages; - - printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - - /* - * Find space for the kernel direct mapping tables. - * - * Later we should allocate these tables in the local node of the - * memory mapped. Unfortunately this is done currently before the - * nodes are discovered. - */ - if (!after_bootmem) - init_gbpages(); - -#ifdef CONFIG_DEBUG_PAGEALLOC - /* - * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. - * This will simplify cpa(), which otherwise needs to support splitting - * large pages into small in interrupt context, etc. - */ - use_pse = use_gbpages = 0; -#else - use_pse = cpu_has_pse; - use_gbpages = direct_gbpages; -#endif - - if (use_gbpages) - page_size_mask |= 1 << PG_LEVEL_1G; - if (use_pse) - page_size_mask |= 1 << PG_LEVEL_2M; - - memset(mr, 0, sizeof(mr)); - nr_range = 0; - - /* head if not big page alignment ?*/ - start_pfn = start >> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range*/ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (1G) range */ - start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); - pos = end_pfn << PAGE_SHIFT; - } - - /* tail is not big page (1G) alignment */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); - pos = end_pfn << PAGE_SHIFT; - } - - /* tail is not big page (2M) alignment */ - start_pfn = pos>>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof (struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( - (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); - - if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); - - for (i = 0; i < nr_range; i++) - last_map_addr = kernel_physical_mapping_init( - mr[i].start, mr[i].end, - mr[i].page_size_mask); - - if (!after_bootmem) - mmu_cr4_features = read_cr4(); - __flush_tlb_all(); - - if (!after_bootmem && table_end > table_start) - reserve_early(table_start << PAGE_SHIFT, - table_end << PAGE_SHIFT, "PGTABLE"); - - printk(KERN_INFO "last_map_addr: %lx end: %lx\n", - last_map_addr, end); - - if (!after_bootmem) - early_memtest(start, end); - - return last_map_addr >> PAGE_SHIFT; -} - #ifndef CONFIG_NUMA void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { @@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} - - static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -1019,13 +768,6 @@ void mark_rodata_ro(void) #endif -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d42ff4..699c9b2895a 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -31,16 +31,27 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) } EXPORT_SYMBOL_GPL(is_io_mapping_possible); -/* Map 'pfn' using fixed map 'type' and protections 'prot' - */ -void * -iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +} + +/* + * Map 'pfn' using fixed map 'type' and protections 'prot' + */ +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the @@ -50,12 +61,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 433f7bd4648..0dfa09d69e8 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,13 +22,17 @@ #include <asm/pgalloc.h> #include <asm/pat.h> -#ifdef CONFIG_X86_64 - -static inline int phys_addr_valid(unsigned long addr) +static inline int phys_addr_valid(resource_size_t addr) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif } +#ifdef CONFIG_X86_64 + unsigned long __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { @@ -38,8 +42,7 @@ unsigned long __phys_addr(unsigned long x) } else { VIRTUAL_BUG_ON(x < PAGE_OFFSET); x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : - !phys_addr_valid(x)); + VIRTUAL_BUG_ON(!phys_addr_valid(x)); } return x; } @@ -56,10 +59,8 @@ bool __virt_addr_valid(unsigned long x) if (x < PAGE_OFFSET) return false; x -= PAGE_OFFSET; - if (system_state == SYSTEM_BOOTING ? - x > MAXMEM : !phys_addr_valid(x)) { + if (!phys_addr_valid(x)) return false; - } } return pfn_valid(x >> PAGE_SHIFT); @@ -68,18 +69,12 @@ EXPORT_SYMBOL(__virt_addr_valid); #else -static inline int phys_addr_valid(unsigned long addr) -{ - return 1; -} - #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - /* VMALLOC_* aren't constants; not available at the boot time */ + /* VMALLOC_* aren't constants */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && - is_vmalloc_addr((void *) x)); + VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); @@ -89,7 +84,9 @@ bool __virt_addr_valid(unsigned long x) { if (x < PAGE_OFFSET) return false; - if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) + if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) + return false; + if (x >= FIXADDR_START) return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } @@ -508,13 +505,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + void __init early_ioremap_init(void) { pmd_t *pmd; + int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -581,6 +584,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) { int count = 0; @@ -602,7 +606,8 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem * +__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -668,9 +673,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); return prev_map[slot]; } @@ -738,8 +743,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a518dd08a3..4f115e00486 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_warning("kmmio: spurious debug trap on CPU %d.\n", + pr_debug("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d03..605c8be0621 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + return 0; } diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 451fe95a035..3daefa04ace 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn, for_each_online_node(nid) propagate_e820_map_node(nid); - for_each_online_node(nid) + for_each_online_node(nid) { memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; + } - NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9c4294986af..d71e1b636ce 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -16,6 +16,7 @@ #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/setup.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/proto.h> @@ -33,6 +34,7 @@ struct cpa_data { unsigned long pfn; unsigned force_split : 1; int curpage; + struct page **pages; }; /* @@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_FLUSHTLB 1 #define CPA_ARRAY 2 +#define CPA_PAGES_ARRAY 4 #ifdef CONFIG_PROC_FS static unsigned long direct_pages_count[PG_LEVEL_NUM]; @@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } -static void cpa_flush_array(unsigned long *start, int numpages, int cache) +static void cpa_flush_array(unsigned long *start, int numpages, int cache, + int in_flags, struct page **pages) { unsigned int i, level; - unsigned long *addr; BUG_ON(irqs_disabled()); @@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache) * will cause all other CPUs to flush the same * cachelines: */ - for (i = 0, addr = start; i < numpages; i++, addr++) { - pte_t *pte = lookup_address(*addr, &level); + for (i = 0; i < numpages; i++) { + unsigned long addr; + pte_t *pte; + + if (in_flags & CPA_PAGES_ARRAY) + addr = (unsigned long)page_address(pages[i]); + else + addr = start[i]; + + pte = lookup_address(addr, &level); /* * Only flush present addresses: */ if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range((void *) *addr, PAGE_SIZE); + clflush_cache_range((void *)addr, PAGE_SIZE); } } @@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) unsigned int level; pte_t *kpte, old_pte; - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) + address = (unsigned long)page_address(cpa->pages[cpa->curpage]); + else if (cpa->flags & CPA_ARRAY) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; @@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the direct * mapping already: */ - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) + vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); + else if (cpa->flags & CPA_ARRAY) vaddr = cpa->vaddr[cpa->curpage]; else vaddr = *cpa->vaddr; @@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa = *cpa; temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~CPA_ARRAY; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); ret = __change_page_attr_set_clr(&alias_cpa, 0); @@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the high * mapping already: */ - if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) + if (within(vaddr, (unsigned long) _text, _brk_end)) return 0; /* @@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa = *cpa; temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~CPA_ARRAY; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); /* * The high mapping range is imprecise, so ignore the return value. @@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) */ cpa->numpages = numpages; /* for array changes, we can't use large page */ - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) cpa->numpages = 1; if (!debug_pagealloc) @@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) */ BUG_ON(cpa->numpages > numpages); numpages -= cpa->numpages; - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) cpa->curpage++; else *cpa->vaddr += cpa->numpages * PAGE_SIZE; @@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr) static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, - int force_split, int array) + int force_split, int in_flag, + struct page **pages) { struct cpa_data cpa; int ret, cache, checkalias; @@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, return 0; /* Ensure we are PAGE_SIZE aligned */ - if (!array) { - if (*addr & ~PAGE_MASK) { - *addr &= PAGE_MASK; - /* - * People should not be passing in unaligned addresses: - */ - WARN_ON_ONCE(1); - } - } else { + if (in_flag & CPA_ARRAY) { int i; for (i = 0; i < numpages; i++) { if (addr[i] & ~PAGE_MASK) { @@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, WARN_ON_ONCE(1); } } + } else if (!(in_flag & CPA_PAGES_ARRAY)) { + /* + * in_flag of CPA_PAGES_ARRAY implies it is aligned. + * No need to cehck in that case + */ + if (*addr & ~PAGE_MASK) { + *addr &= PAGE_MASK; + /* + * People should not be passing in unaligned addresses: + */ + WARN_ON_ONCE(1); + } } /* Must avoid aliasing mappings in the highmem code */ @@ -832,6 +852,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, arch_flush_lazy_mmu_mode(); cpa.vaddr = addr; + cpa.pages = pages; cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; @@ -839,8 +860,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.curpage = 0; cpa.force_split = force_split; - if (array) - cpa.flags |= CPA_ARRAY; + if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa.flags |= in_flag; /* No alias checking for _NX bit modifications */ checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; @@ -866,9 +887,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * wbindv): */ if (!ret && cpu_has_clflush) { - if (cpa.flags & CPA_ARRAY) - cpa_flush_array(addr, numpages, cache); - else + if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + cpa_flush_array(addr, numpages, cache, + cpa.flags, pages); + } else cpa_flush_range(*addr, numpages, cache); } else cpa_flush_all(cache); @@ -888,14 +910,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, - array); + (array ? CPA_ARRAY : 0), NULL); } static inline int change_page_attr_clear(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, - array); + (array ? CPA_ARRAY : 0), NULL); +} + +static inline int cpa_set_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, + CPA_PAGES_ARRAY, pages); +} + +static inline int cpa_clear_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, + CPA_PAGES_ARRAY, pages); } int _set_memory_uc(unsigned long addr, int numpages) @@ -1043,7 +1079,7 @@ int set_memory_np(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { return change_page_attr_set_clr(&addr, numpages, __pgprot(0), - __pgprot(0), 1, 0); + __pgprot(0), 1, 0, NULL); } int set_pages_uc(struct page *page, int numpages) @@ -1054,6 +1090,35 @@ int set_pages_uc(struct page *page, int numpages) } EXPORT_SYMBOL(set_pages_uc); +int set_pages_array_uc(struct page **pages, int addrinarray) +{ + unsigned long start; + unsigned long end; + int i; + int free_idx; + + for (i = 0; i < addrinarray; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) + goto err_out; + } + + if (cpa_set_pages_array(pages, addrinarray, + __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { + return 0; /* Success */ + } +err_out: + free_idx = i; + for (i = 0; i < free_idx; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + free_memtype(start, end); + } + return -EINVAL; +} +EXPORT_SYMBOL(set_pages_array_uc); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); @@ -1062,6 +1127,26 @@ int set_pages_wb(struct page *page, int numpages) } EXPORT_SYMBOL(set_pages_wb); +int set_pages_array_wb(struct page **pages, int addrinarray) +{ + int retval; + unsigned long start; + unsigned long end; + int i; + + retval = cpa_clear_pages_array(pages, addrinarray, + __pgprot(_PAGE_CACHE_MASK)); + + for (i = 0; i < addrinarray; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + free_memtype(start, end); + } + + return retval; +} +EXPORT_SYMBOL(set_pages_array_wb); + int set_pages_x(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2ed37158012..640339ee4fb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. + * reserve_pfn_range() doesn't support RAM pages. Maintain the current + * behavior with RAM pages by returning success. */ if (is_ram != 0) - return -EINVAL; + return 0; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index f2e477c91c1..46c8834aedc 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) } pte = pte_offset_kernel(pmd, vaddr); if (pte_val(pteval)) - set_pte_present(&init_mm, vaddr, pte, pteval); + set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e448..821e97017e9 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, cpumask, cpumask_of(smp_processor_id())); /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* * We have to send the IPI only to * CPUs affected. */ |