diff options
Diffstat (limited to 'arch/arm/mm')
43 files changed, 1154 insertions, 257 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 35955b54944..db5c2cab8fd 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -392,11 +392,21 @@ config CPU_V7 select CPU_CACHE_V7 select CPU_CACHE_VIPT select CPU_COPY_V6 if MMU - select CPU_CP15_MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V7 select CPU_TLB_V7 if MMU +# ARMv7M +config CPU_V7M + bool + select CPU_32v7M + select CPU_ABRT_NOMMU + select CPU_CACHE_NOP + select CPU_PABRT_LEGACY + select CPU_THUMBONLY + config CPU_THUMBONLY bool # There are no CPUs available with MMU that don't implement an ARM ISA: @@ -411,24 +421,28 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v5 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v6 bool @@ -441,6 +455,9 @@ config CPU_32v6K config CPU_32v7 bool +config CPU_32v7M + bool + # The abort model config CPU_ABRT_NOMMU bool @@ -491,6 +508,9 @@ config CPU_CACHE_V6 config CPU_CACHE_V7 bool +config CPU_CACHE_NOP + bool + config CPU_CACHE_VIVT bool @@ -613,7 +633,11 @@ config ARCH_DMA_ADDR_T_64BIT config ARM_THUMB bool "Support Thumb user binaries" if !CPU_THUMBONLY - depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || CPU_V7 || CPU_FEROCEON + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \ + CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \ + CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ + CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \ + CPU_V7 || CPU_FEROCEON || CPU_V7M default y help Say Y if you want to include kernel support for running user space @@ -756,6 +780,7 @@ config CPU_BPREDICT_DISABLE config TLS_REG_EMUL bool + select NEED_KUSER_HELPERS help An SMP system using a pre-ARMv6 processor (there are apparently a few prototypes like that in existence) and therefore access to @@ -763,11 +788,40 @@ config TLS_REG_EMUL config NEEDS_SYSCALL_FOR_CMPXCHG bool + select NEED_KUSER_HELPERS help SMP on a pre-ARMv6 processor? Well OK then. Forget about fast user space cmpxchg support. It is just not possible. +config NEED_KUSER_HELPERS + bool + +config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + default y + help + Warning: disabling this option may break user programs. + + Provide kuser helpers in the vector page. The kernel provides + helper code to userspace in read only form at a fixed location + in the high vector page to allow userspace to be independent of + the CPU type fitted to the system. This permits binaries to be + run on ARMv4 through to ARMv7 without modification. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off. However, + when such an binary or library is run, it will receive a SIGILL + signal, which will terminate the program. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + config DMA_CACHE_RWFO bool "Enable read/write for ownership DMA cache maintenance" depends on CPU_V6K && SMP diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 9e51be96f63..ecfe6e53f6e 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -39,6 +40,7 @@ obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o +obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o AFLAGS_cache-v6.o :=-Wa,-march=armv6 AFLAGS_cache-v7.o :=-Wa,-march=armv7-a @@ -87,6 +89,7 @@ obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V6K) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CPU_V7M) += proc-v7m.o AFLAGS_proc-v6.o :=-Wa,-march=armv6 AFLAGS_proc-v7.o :=-Wa,-march=armv7-a diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c465faca51b..d70e0aba0c9 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + static void __init l2x0_of_setup(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = { }, }; +static const struct l2x0_of_data bcm_l2x0_data = { + .setup = pl310_of_setup, + .save = pl310_save, + .outer_cache = { + .resume = pl310_resume, + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, +}; + static const struct of_device_id l2x0_ids[] __initconst = { { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, @@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = { .data = (void *)&aurora_no_outer_data}, { .compatible = "marvell,aurora-outer-cache", .data = (void *)&aurora_with_outer_data}, + { .compatible = "bcm,bcm11351-a2-pl310-cache", + .data = (void *)&bcm_l2x0_data}, {} }; diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S new file mode 100644 index 00000000000..8e12ddca003 --- /dev/null +++ b/arch/arm/mm/cache-nop.S @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include "proc-macros.S" + +ENTRY(nop_flush_icache_all) + mov pc, lr +ENDPROC(nop_flush_icache_all) + + .globl nop_flush_kern_cache_all + .equ nop_flush_kern_cache_all, nop_flush_icache_all + + .globl nop_flush_kern_cache_louis + .equ nop_flush_kern_cache_louis, nop_flush_icache_all + + .globl nop_flush_user_cache_all + .equ nop_flush_user_cache_all, nop_flush_icache_all + + .globl nop_flush_user_cache_range + .equ nop_flush_user_cache_range, nop_flush_icache_all + + .globl nop_coherent_kern_range + .equ nop_coherent_kern_range, nop_flush_icache_all + +ENTRY(nop_coherent_user_range) + mov r0, 0 + mov pc, lr +ENDPROC(nop_coherent_user_range) + + .globl nop_flush_kern_dcache_area + .equ nop_flush_kern_dcache_area, nop_flush_icache_all + + .globl nop_dma_flush_range + .equ nop_dma_flush_range, nop_flush_icache_all + + .globl nop_dma_map_area + .equ nop_dma_map_area, nop_flush_icache_all + + .globl nop_dma_unmap_area + .equ nop_dma_unmap_area, nop_flush_icache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions nop diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 15451ee4acc..515b00064da 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -92,6 +92,14 @@ ENTRY(v7_flush_dcache_louis) mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr +#ifdef CONFIG_ARM_ERRATA_643719 + ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register + ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do + ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? + biceq r2, r2, #0x0000000f @ clear minor revision number + teqeq r2, r1 @ test for errata affected core and if so... + orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') +#endif ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 moveq pc, lr @ return if level == 0 diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac37372ef5..4a0544492f1 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -20,6 +20,7 @@ #include <asm/smp_plat.h> #include <asm/thread_notify.h> #include <asm/tlbflush.h> +#include <asm/proc-fns.h> /* * On ARMv6, we have the following structure in the Context ID: @@ -39,33 +40,51 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { - unsigned long ttbl = __pa(swapper_pg_dir); - unsigned long ttbh = 0; - /* * Set TTBR0 to swapper_pg_dir which contains only global entries. The * ASID is set to 0. */ - asm volatile( - " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" - : - : "r" (ttbl), "r" (ttbh)); + cpu_set_ttbr(0, __pa(swapper_pg_dir)); isb(); } #else @@ -128,7 +147,16 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - __set_bit(ASID_TO_IDX(asid), asid_map); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -167,17 +195,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } @@ -215,7 +245,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); - dummy_flush_tlb_a15_erratum(); + if (erratum_a15_798181()) + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3aac9..7f9b1798c6c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size) #ifdef CONFIG_MMU #ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB +#warning ARM Coherent DMA allocator does not (yet) support huge TLB #endif static void *__alloc_from_contiguous(struct device *dev, size_t size, @@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** @@ -1314,6 +1328,15 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (gfp & GFP_ATOMIC) return __iommu_alloc_atomic(dev, size, handle); + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); if (!pages) return NULL; @@ -1372,16 +1395,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct page **pages = __iommu_get_pages(cpu_addr, attrs); + struct page **pages; size = PAGE_ALIGN(size); - if (!pages) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); return; } - if (__in_atomic_pool(cpu_addr, size)) { - __iommu_free_atomic(dev, cpu_addr, handle, size); + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } @@ -1636,13 +1660,27 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t dma_addr; - int ret, len = PAGE_ALIGN(size + offset); + int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) goto fail; @@ -1907,7 +1945,7 @@ void arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - mapping = NULL; + dev->archdata.mapping = NULL; set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f954f..c97f7940cb9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -491,12 +491,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr, * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ +#ifndef CONFIG_ARM_LPAE static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { do_bad_area(addr, fsr, regs); return 0; } +#endif /* CONFIG_ARM_LPAE */ /* * This abort handler always returns "fault". diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 0d473cce501..6d5ba9afb16 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -17,6 +17,7 @@ #include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <linux/hugetlb.h> #include "mm.h" @@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * coherent with the kernels mapping. */ if (!PageHighMem(page)) { - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + size_t page_size = PAGE_SIZE << compound_order(page); + __cpuc_flush_dcache_area(page_address(page), page_size); } else { - void *addr; - + unsigned long i; if (cache_is_vipt_nonaliasing()) { - addr = kmap_atomic(page); - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_atomic(addr); - } else { - addr = kmap_high_get(page); - if (addr) { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_atomic(page); __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high(page); + kunmap_atomic(addr); + } + } else { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } } } } @@ -287,7 +292,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !mapping_mapped(mapping)) + mapping && !page_mapped(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); @@ -301,6 +306,39 @@ void flush_dcache_page(struct page *page) EXPORT_SYMBOL(flush_dcache_page); /* + * Ensure cache coherency for the kernel mapping of this page. We can + * assume that the page is pinned via kmap. + * + * If the page only exists in the page cache and there are no user + * space mappings, this is a no-op since the page was already marked + * dirty at creation. Otherwise, we need to flush the dirty kernel + * cache lines directly. + */ +void flush_kernel_dcache_page(struct page *page) +{ + if (cache_is_vivt() || cache_is_vipt_aliasing()) { + struct address_space *mapping; + + mapping = page_mapping(page); + + if (!mapping || mapping_mapped(mapping)) { + void *addr; + + addr = page_address(page); + /* + * kmap_atomic() doesn't set the page virtual + * address for highmem pages, and + * kunmap_atomic() takes care of cache + * flushing already. + */ + if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + } + } +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +/* * Flush an anonymous page so that users of get_user_pages() * can safely access the data. The expected sequence is: * diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c index 05a4e943183..ab4409a2307 100644 --- a/arch/arm/mm/fsr-3level.c +++ b/arch/arm/mm/fsr-3level.c @@ -9,11 +9,11 @@ static struct fsr_info fsr_info[] = { { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 00000000000..3d1e4a205b0 --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,101 @@ +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + + return (pte_t *)pmd; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcd..15225d829d7 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -36,12 +36,13 @@ #include "mm.h" -static unsigned long phys_initrd_start __initdata = 0; +static phys_addr_t phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; static int __init early_initrd(char *p) { - unsigned long start, size; + phys_addr_t start; + unsigned long size; char *endp; start = memparse(p, &endp); @@ -350,14 +351,14 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) #ifdef CONFIG_BLK_DEV_INITRD if (phys_initrd_size && !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size && memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size) { @@ -442,7 +443,7 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; - unsigned long pg, pgend; + phys_addr_t pg, pgend; /* * Convert start_pfn/end_pfn to a struct page pointer. @@ -454,8 +455,8 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * Convert to physical addresses, and * round start upwards and end downwards. */ - pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); - pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; /* * If there are free pages between these, @@ -582,9 +583,6 @@ static void __init free_highpages(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; #ifdef CONFIG_HAVE_TCM /* These pointers are filled in on TCM detection */ extern u32 dtcm_end; @@ -595,57 +593,16 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_unused_memmap(&meminfo); - - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); + free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); #endif free_highpages(); - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -711,7 +668,7 @@ void __init mem_init(void) BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); #endif - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get @@ -728,12 +685,12 @@ void free_initmem(void) extern char __tcm_start, __tcm_end; poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); - free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link"); + free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -744,7 +701,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 04d9006eab1..f123d6eb074 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, return (void __iomem *) (offset + addr); } -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - unsigned long last_addr; + phys_addr_t last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); @@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return arch_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); @@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap); * CONFIG_GENERIC_ALLOCATOR for allocating external memory. */ void __iomem * -__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached) +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) { unsigned int mtype; diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 10062ceadd1..0c6356255fe 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -181,11 +181,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e0d8565671a..53cdbd39ec8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -616,10 +616,12 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init map_init_section(pmd_t *pmd, unsigned long addr, +static void __init __map_init_section(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys, const struct mem_type *type) { + pmd_t *p = pmd; + #ifndef CONFIG_ARM_LPAE /* * In classic MMU format, puds and pmds are folded in to @@ -638,7 +640,7 @@ static void __init map_init_section(pmd_t *pmd, unsigned long addr, phys += SECTION_SIZE; } while (pmd++, addr += SECTION_SIZE, addr != end); - flush_pmd_entry(pmd); + flush_pmd_entry(p); } static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, @@ -661,7 +663,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, */ if (type->prot_sect && ((addr | next | phys) & ~SECTION_MASK) == 0) { - map_init_section(pmd, addr, next, phys, type); + __map_init_section(pmd, addr, next, phys, type); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), type); @@ -673,7 +675,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, const struct mem_type *type) + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; @@ -947,7 +950,7 @@ void __init debug_ll_io_init(void) map.virtual &= PAGE_MASK; map.length = PAGE_SIZE; map.type = MT_DEVICE; - create_mapping(&map); + iotable_init(&map, 1); } #endif @@ -986,28 +989,30 @@ phys_addr_t arm_lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { + phys_addr_t memblock_limit = 0; int i, j, highmem = 0; + phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; for (i = 0, j = 0; i < meminfo.nr_banks; i++) { struct membank *bank = &meminfo.bank[j]; - *bank = meminfo.bank[i]; + phys_addr_t size_limit; - if (bank->start > ULONG_MAX) - highmem = 1; + *bank = meminfo.bank[i]; + size_limit = bank->size; -#ifdef CONFIG_HIGHMEM - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) + if (bank->start >= vmalloc_limit) highmem = 1; + else + size_limit = vmalloc_limit - bank->start; bank->highmem = highmem; +#ifdef CONFIG_HIGHMEM /* * Split those memory banks which are partially overlapping * the vmalloc area greatly simplifying things later. */ - if (!highmem && __va(bank->start) < vmalloc_min && - bank->size > vmalloc_min - __va(bank->start)) { + if (!highmem && bank->size > size_limit) { if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " "ignoring high memory\n"); @@ -1016,16 +1021,14 @@ void __init sanity_check_meminfo(void) (meminfo.nr_banks - i) * sizeof(*bank)); meminfo.nr_banks++; i++; - bank[1].size -= vmalloc_min - __va(bank->start); - bank[1].start = __pa(vmalloc_min - 1) + 1; + bank[1].size -= size_limit; + bank[1].start = vmalloc_limit; bank[1].highmem = highmem = 1; j++; } - bank->size = vmalloc_min - __va(bank->start); + bank->size = size_limit; } #else - bank->highmem = highmem; - /* * Highmem banks not allowed with !CONFIG_HIGHMEM. */ @@ -1038,36 +1041,44 @@ void __init sanity_check_meminfo(void) } /* - * Check whether this memory bank would entirely overlap - * the vmalloc area. - */ - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) { - printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " - "(vmalloc region overlap).\n", - (unsigned long long)bank->start, - (unsigned long long)bank->start + bank->size - 1); - continue; - } - - /* * Check whether this memory bank would partially overlap * the vmalloc area. */ - if (__va(bank->start + bank->size - 1) >= vmalloc_min || - __va(bank->start + bank->size - 1) <= __va(bank->start)) { - unsigned long newsize = vmalloc_min - __va(bank->start); + if (bank->size > size_limit) { printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " "to -%.8llx (vmalloc region overlap).\n", (unsigned long long)bank->start, (unsigned long long)bank->start + bank->size - 1, - (unsigned long long)bank->start + newsize - 1); - bank->size = newsize; + (unsigned long long)bank->start + size_limit - 1); + bank->size = size_limit; } #endif - if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) - arm_lowmem_limit = bank->start + bank->size; + if (!bank->highmem) { + phys_addr_t bank_end = bank->start + bank->size; + + if (bank_end > arm_lowmem_limit) + arm_lowmem_limit = bank_end; + /* + * Find the first non-section-aligned page, and point + * memblock_limit at it. This relies on rounding the + * limit down to be section-aligned, which happens at + * the end of this function. + * + * With this algorithm, the start or end of almost any + * bank can be non-section-aligned. The only exception + * is that the start of the bank 0 must be section- + * aligned, since otherwise memory would need to be + * allocated when mapping the start of bank 0, which + * occurs before any free memory is mapped. + */ + if (!memblock_limit) { + if (!IS_ALIGNED(bank->start, SECTION_SIZE)) + memblock_limit = bank->start; + else if (!IS_ALIGNED(bank_end, SECTION_SIZE)) + memblock_limit = bank_end; + } + } j++; } #ifdef CONFIG_HIGHMEM @@ -1092,7 +1103,18 @@ void __init sanity_check_meminfo(void) #endif meminfo.nr_banks = j; high_memory = __va(arm_lowmem_limit - 1) + 1; - memblock_set_current_limit(arm_lowmem_limit); + + /* + * Round the memblock limit down to a section size. This + * helps to ensure that we will allocate memory from the + * last full section, which should be mapped. + */ + if (memblock_limit) + memblock_limit = round_down(memblock_limit, SECTION_SIZE); + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + + memblock_set_current_limit(memblock_limit); } static inline void prepare_page_table(void) @@ -1173,7 +1195,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) /* * Allocate the vector page early. */ - vectors = early_alloc(PAGE_SIZE); + vectors = early_alloc(PAGE_SIZE * 2); early_trap_init(vectors); @@ -1218,20 +1240,34 @@ static void __init devicemaps_init(struct machine_desc *mdesc) map.pfn = __phys_to_pfn(virt_to_phys(vectors)); map.virtual = 0xffff0000; map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif create_mapping(&map); if (!vectors_high()) { map.virtual = 0; + map.length = PAGE_SIZE * 2; map.type = MT_LOW_VECTORS; create_mapping(&map); } + /* Now create a kernel read-only mapping */ + map.pfn += 1; + map.virtual = 0xffff0000 + PAGE_SIZE; + map.length = PAGE_SIZE; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + /* * Ask the machine support to map in the statically mapped devices. */ if (mdesc->map_io) mdesc->map_io(); + else + debug_ll_io_init(); fill_pmd_gaps(); /* Reserve fixed i/o space in VMALLOC region */ @@ -1287,8 +1323,6 @@ void __init paging_init(struct machine_desc *mdesc) { void *zero_page; - memblock_set_current_limit(arm_lowmem_limit); - build_mem_type_table(); prepare_page_table(); map_lowmem(); diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index d51225f90ae..1fa50100ab6 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -8,6 +8,7 @@ #include <linux/pagemap.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kernel.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -15,22 +16,282 @@ #include <asm/setup.h> #include <asm/traps.h> #include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> #include "mm.h" +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init sanity_check_meminfo_mpu(void) +{ + int i; + struct membank *bank = meminfo.bank; + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + + /* Initially only use memory continuous from PHYS_OFFSET */ + if (bank_phys_start(&bank[0]) != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + /* Banks have already been sorted by start address */ + for (i = 1; i < meminfo.nr_banks; i++) { + if (bank[i].start <= bank_phys_end(&bank[0]) && + bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) { + bank[0].size = bank_phys_end(&bank[i]) - bank[0].start; + } else { + pr_notice("Ignoring RAM after 0x%.8lx. " + "First non-contiguous (ignored) bank start: 0x%.8lx\n", + (unsigned long)bank_phys_end(&bank[0]), + (unsigned long)bank_phys_start(&bank[i])); + break; + } + } + /* All contiguous banks are now merged in to the first bank */ + meminfo.nr_banks = 1; + specified_mem_size = bank[0].size; + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(bank[0].size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) + pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)", + (unsigned long)specified_mem_size, + (unsigned long)aligned_region_size); + + meminfo.bank[0].size = aligned_region_size; + pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n", + (unsigned long)phys_offset, + (unsigned long)aligned_region_size, + (unsigned long)bank_phys_end(&bank[0])); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(meminfo.bank[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} +#else +static void sanity_check_meminfo_mpu(void) {} +static void __init mpu_setup(void) {} +#endif /* CONFIG_ARM_MPU */ + void __init arm_mm_memblock_reserve(void) { +#ifndef CONFIG_CPU_V7M /* * Register the exception vector page. * some architectures which the DRAM is the exception vector to trap, * alloc_page breaks with error, although it is not NULL, but "0." */ memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE); +#else /* ifndef CONFIG_CPU_V7M */ + /* + * There is no dedicated vector page on V7-M. So nothing needs to be + * reserved here. + */ +#endif } void __init sanity_check_meminfo(void) { - phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); + phys_addr_t end; + sanity_check_meminfo_mpu(); + end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); high_memory = __va(end - 1) + 1; } @@ -41,6 +302,7 @@ void __init sanity_check_meminfo(void) void __init paging_init(struct machine_desc *mdesc) { early_trap_init((void *)CONFIG_VECTORS_BASE); + mpu_setup(); bootmem_init(); } @@ -57,6 +319,12 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +void flush_kernel_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) @@ -81,16 +349,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, return __arm_ioremap_pfn(pfn, offset, size, mtype); } -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return (void __iomem *)phys_addr; } EXPORT_SYMBOL(__arm_ioremap); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *); +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 2bb61e703d6..d1a2d05971e 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -443,8 +443,6 @@ ENTRY(cpu_arm1020_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1020_setup, #function __arm1020_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 8f96aa40f51..9d89405c3d0 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -425,8 +425,6 @@ ENTRY(cpu_arm1020e_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1020e_setup, #function __arm1020e_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 8ebe4a469a2..6f01a0ae3b3 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -407,8 +407,6 @@ ENTRY(cpu_arm1022_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1022_setup, #function __arm1022_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 093fc7e520c..4799a24b43e 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -396,9 +396,6 @@ ENTRY(cpu_arm1026_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - - __CPUINIT - .type __arm1026_setup, #function __arm1026_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index 0ac908c7ade..d42c37f9f5b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -116,8 +116,6 @@ ENTRY(cpu_arm720_reset) ENDPROC(cpu_arm720_reset) .popsection - __CPUINIT - .type __arm710_setup, #function __arm710_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index fde2d2a794c..9b0ae90cbf1 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -60,8 +60,6 @@ ENTRY(cpu_arm740_reset) ENDPROC(cpu_arm740_reset) .popsection - __CPUINIT - .type __arm740_setup, #function __arm740_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 6ddea3e464b..f6cc3f63ce3 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -51,8 +51,6 @@ ENTRY(cpu_arm7tdmi_reset) ENDPROC(cpu_arm7tdmi_reset) .popsection - __CPUINIT - .type __arm7tdmi_setup, #function __arm7tdmi_setup: mov pc, lr diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 2556cf1c2da..549557df6d5 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -410,8 +410,6 @@ ENTRY(cpu_arm920_do_resume) ENDPROC(cpu_arm920_do_resume) #endif - __CPUINIT - .type __arm920_setup, #function __arm920_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 4464c49d744..2a758b06c6f 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -388,8 +388,6 @@ ENTRY(cpu_arm922_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm922_setup, #function __arm922_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 281eb9b9c1d..97448c3acf3 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -438,8 +438,6 @@ ENTRY(cpu_arm925_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm925_setup, #function __arm925_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 344c8a548cc..0f098f407c9 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -425,8 +425,6 @@ ENTRY(cpu_arm926_do_resume) ENDPROC(cpu_arm926_do_resume) #endif - __CPUINIT - .type __arm926_setup, #function __arm926_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 8da189d4a40..1c39a704ff6 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -273,8 +273,6 @@ ENDPROC(arm940_dma_unmap_area) @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm940 - __CPUINIT - .type __arm940_setup, #function __arm940_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index f666cf34075..0289cd905e7 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -326,8 +326,6 @@ ENTRY(cpu_arm946_dcache_clean_area) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr - __CPUINIT - .type __arm946_setup, #function __arm946_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 8881391dfb9..f51197ba754 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -51,8 +51,6 @@ ENTRY(cpu_arm9tdmi_reset) ENDPROC(cpu_arm9tdmi_reset) .popsection - __CPUINIT - .type __arm9tdmi_setup, #function __arm9tdmi_setup: mov pc, lr diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index d217e9795d7..2dfc0f1d3bf 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -81,7 +81,6 @@ ENDPROC(cpu_fa526_reset) */ .align 4 ENTRY(cpu_fa526_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mov pc, lr @@ -136,8 +135,6 @@ ENTRY(cpu_fa526_set_pte_ext) #endif mov pc, lr - __CPUINIT - .type __fa526_setup, #function __fa526_setup: /* On return of this routine, r0 must carry correct flags for CFG register */ diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 4106b09e0c2..d5146b98c8d 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -514,8 +514,6 @@ ENTRY(cpu_feroceon_set_pte_ext) #endif mov pc, lr - __CPUINIT - .type __feroceon_setup, #function __feroceon_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index f9a0aa725ea..e3c48a3fe06 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -333,3 +333,8 @@ ENTRY(\name\()_tlb_fns) .endif .size \name\()_tlb_fns, . - \name\()_tlb_fns .endm + +.macro globl_equ x, y + .globl \x + .equ \x, \y +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 0b60dd3d742..40acba59573 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -383,8 +383,6 @@ ENTRY(cpu_mohawk_do_resume) ENDPROC(cpu_mohawk_do_resume) #endif - __CPUINIT - .type __mohawk_setup, #function __mohawk_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 775d70fba93..c45319c8f1d 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -159,8 +159,6 @@ ENTRY(cpu_sa110_set_pte_ext) #endif mov pc, lr - __CPUINIT - .type __sa110_setup, #function __sa110_setup: mov r10, #0 diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index d92dfd08142..09d241ae2db 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -198,8 +198,6 @@ ENTRY(cpu_sa1100_do_resume) ENDPROC(cpu_sa1100_do_resume) #endif - __CPUINIT - .type __sa1100_setup, #function __sa1100_setup: mov r0, #0 diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 919405e20b8..1128064fddc 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -140,8 +140,10 @@ ENTRY(cpu_v6_set_pte_ext) ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register mrc p15, 0, r8, c1, c0, 2 @ co-processor access control mrc p15, 0, r9, c1, c0, 0 @ control register @@ -158,14 +160,16 @@ ENTRY(cpu_v6_do_resume) mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0, {r4 - r9} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mcr p15, 0, r5, c3, c0, 0 @ Domain ID ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) ALT_UP(orr r1, r1, #TTB_FLAGS_UP) mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register mcr p15, 0, r8, c1, c0, 2 @ co-processor access control - mcr p15, 0, ip, c2, c0, 2 @ TTB control register mcr p15, 0, ip, c7, c5, 4 @ ISB mov r0, r9 @ control register b cpu_resume_mmu @@ -176,8 +180,6 @@ ENDPROC(cpu_v6_do_resume) .align - __CPUINIT - /* * __v6_setup * diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 9704097c450..bdd3be4be77 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -110,7 +110,7 @@ ENTRY(cpu_v7_set_pte_ext) ARM( str r3, [r0, #2048]! ) THUMB( add r0, r0, #2048 ) THUMB( str r3, [r0] ) - ALT_SMP(mov pc,lr) + ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr @@ -160,8 +160,6 @@ ENDPROC(cpu_v7_set_pte_ext) mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 .endm - __CPUINIT - /* AT * TFR EV X F I D LR S * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM @@ -172,5 +170,3 @@ ENDPROC(cpu_v7_set_pte_ext) .type v7_crval, #object v7_crval: crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c - - .previous diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 363027e811d..01a719e18bb 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -39,6 +39,14 @@ #define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) #define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + /* * cpu_v7_switch_mm(pgd_phys, tsk) * @@ -47,10 +55,10 @@ */ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU - mmid r1, r1 @ get mm->context.id - asid r3, r1 - mov r3, r3, lsl #(48 - 32) @ ASID - mcrr p15, 0, r0, r3, c2 @ set TTB 0 + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 isb #endif mov pc, lr @@ -73,7 +81,7 @@ ENTRY(cpu_v7_set_pte_ext) tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] - ALT_SMP(mov pc, lr) + ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr @@ -106,7 +114,8 @@ ENDPROC(cpu_v7_set_pte_ext) */ .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? (branch below) + mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) @@ -114,31 +123,23 @@ ENDPROC(cpu_v7_set_pte_ext) ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) /* - * TTBR0/TTBR1 split (PAGE_OFFSET): - * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) - * 0x80000000: T0SZ = 0, T1SZ = 1 - * 0xc0000000: T0SZ = 0, T1SZ = 2 - * - * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise - * booting secondary CPUs would end up using TTBR1 for the identity - * mapping set up in TTBR0. + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. */ - bhi 9001f @ PHYS_OFFSET > PAGE_OFFSET? - orr \tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ -#if defined CONFIG_VMSPLIT_2G - /* PAGE_OFFSET == 0x80000000, T1SZ == 1 */ - add \ttbr1, \ttbr1, #1 << 4 @ skip two L1 entries -#elif defined CONFIG_VMSPLIT_3G - /* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */ - add \ttbr1, \ttbr1, #4096 * (1 + 3) @ only L2 used, skip pgd+3*pmd -#endif - /* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */ -9001: mcr p15, 0, \tmp, c2, c0, 2 @ TTB control register - mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 .endm - __CPUINIT - /* * AT * TFR EV X F IHD LR S @@ -150,5 +151,3 @@ ENDPROC(cpu_v7_set_pte_ext) .type v7_crval, #object v7_crval: crval clear=0x0120c302, mmuset=0x30c23c7d, ucset=0x00c01c7c - - .previous diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 2c73a7301ff..73398bcf9bd 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -75,13 +75,14 @@ ENTRY(cpu_v7_do_idle) ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) - ALT_SMP(mov pc, lr) @ MP extensions imply L1 PTW - ALT_UP(W(nop)) - dcache_line_size r2, r3 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + ALT_SMP(W(nop)) @ MP extensions imply L1 PTW + ALT_UP_B(1f) + mov pc, lr +1: dcache_line_size r2, r3 +2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 - bhi 1b + bhi 2b dsb mov pc, lr ENDPROC(cpu_v7_dcache_clean_area) @@ -98,9 +99,11 @@ ENTRY(cpu_v7_do_suspend) mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID stmia r0!, {r4 - r5} +#ifdef CONFIG_MMU mrc p15, 0, r6, c3, c0, 0 @ Domain ID mrc p15, 0, r7, c2, c0, 1 @ TTB 1 mrc p15, 0, r11, c2, c0, 2 @ TTB control register +#endif mrc p15, 0, r8, c1, c0, 0 @ Control register mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control @@ -110,13 +113,14 @@ ENDPROC(cpu_v7_do_suspend) ENTRY(cpu_v7_do_resume) mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0!, {r4 - r5} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID ldmia r0, {r6 - r11} +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r6, c3, c0, 0 @ Domain ID #ifndef CONFIG_ARM_LPAE ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) @@ -125,14 +129,15 @@ ENTRY(cpu_v7_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB 0 mcr p15, 0, r7, c2, c0, 1 @ TTB 1 mcr p15, 0, r11, c2, c0, 2 @ TTB control register - mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register - teq r4, r9 @ Is it already set? - mcrne p15, 0, r9, c1, c0, 1 @ No, so write it - mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control ldr r4, =PRRR @ PRRR ldr r5, =NMRR @ NMRR mcr p15, 0, r4, c10, c2, 0 @ write PRRR mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control isb dsb mov r0, r8 @ control register @@ -140,7 +145,28 @@ ENTRY(cpu_v7_do_resume) ENDPROC(cpu_v7_do_resume) #endif - __CPUINIT +#ifdef CONFIG_CPU_PJ4B + globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm + globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init + globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin + globl_equ cpu_pj4b_reset, cpu_v7_reset +#ifdef CONFIG_PJ4B_ERRATA_4742 +ENTRY(cpu_pj4b_do_idle) + dsb @ WFI may enter a low-power mode + wfi + dsb @barrier + mov pc, lr +ENDPROC(cpu_pj4b_do_idle) +#else + globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle +#endif + globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_pj4b_do_suspend, cpu_v7_do_suspend + globl_equ cpu_pj4b_do_resume, cpu_v7_do_resume + globl_equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + +#endif /* * __v7_setup @@ -155,7 +181,8 @@ ENDPROC(cpu_v7_do_resume) */ __v7_ca5mp_setup: __v7_ca9mp_setup: - mov r10, #(1 << 0) @ TLB ops broadcasting +__v7_cr7mp_setup: + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting b 1f __v7_ca7mp_setup: __v7_ca15mp_setup: @@ -350,6 +377,9 @@ __v7_setup_stack: @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#ifdef CONFIG_CPU_PJ4B + define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif .section ".rodata" @@ -362,7 +392,7 @@ __v7_setup_stack: /* * Standard v7 proc info content */ -.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0 +.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ @@ -375,7 +405,7 @@ __v7_setup_stack: .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ HWCAP_EDSP | HWCAP_TLS | \hwcaps .long cpu_v7_name - .long v7_processor_functions + .long \proc_fns .long v7wbi_tlb_fns .long v6_user_fns .long v7_cache_fns @@ -407,12 +437,24 @@ __v7_ca9mp_proc_info: /* * Marvell PJ4B processor. */ +#ifdef CONFIG_CPU_PJ4B .type __v7_pj4b_proc_info, #object __v7_pj4b_proc_info: - .long 0x562f5840 - .long 0xfffffff0 - __v7_proc __v7_pj4b_setup + .long 0x560f5800 + .long 0xff0fff00 + __v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info +#endif + + /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info /* * ARM Ltd. Cortex A7 processor. diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S new file mode 100644 index 00000000000..0c93588fcb9 --- /dev/null +++ b/arch/arm/mm/proc-v7m.S @@ -0,0 +1,157 @@ +/* + * linux/arch/arm/mm/proc-v7m.S + * + * Copyright (C) 2008 ARM Ltd. + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7-M processor support. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/v7m.h> +#include "proc-macros.S" + +ENTRY(cpu_v7m_proc_init) + mov pc, lr +ENDPROC(cpu_v7m_proc_init) + +ENTRY(cpu_v7m_proc_fin) + mov pc, lr +ENDPROC(cpu_v7m_proc_fin) + +/* + * cpu_v7m_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_v7m_reset) + mov pc, r0 +ENDPROC(cpu_v7m_reset) + +/* + * cpu_v7m_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7m_do_idle) + wfi + mov pc, lr +ENDPROC(cpu_v7m_do_idle) + +ENTRY(cpu_v7m_dcache_clean_area) + mov pc, lr +ENDPROC(cpu_v7m_dcache_clean_area) + +/* + * There is no MMU, so here is nothing to do. + */ +ENTRY(cpu_v7m_switch_mm) + mov pc, lr +ENDPROC(cpu_v7m_switch_mm) + +.globl cpu_v7m_suspend_size +.equ cpu_v7m_suspend_size, 0 + +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7m_do_suspend) + mov pc, lr +ENDPROC(cpu_v7m_do_suspend) + +ENTRY(cpu_v7m_do_resume) + mov pc, lr +ENDPROC(cpu_v7m_do_resume) +#endif + + .section ".text.init", #alloc, #execinstr + +/* + * __v7m_setup + * + * This should be able to cover all ARMv7-M cores. + */ +__v7m_setup: + @ Configure the vector table base address + ldr r0, =BASEADDR_V7M_SCB + ldr r12, =vector_table + str r12, [r0, V7M_SCB_VTOR] + + @ enable UsageFault, BusFault and MemManage fault. + ldr r5, [r0, #V7M_SCB_SHCSR] + orr r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA) + str r5, [r0, #V7M_SCB_SHCSR] + + @ Lower the priority of the SVC and PendSV exceptions + mov r5, #0x80000000 + str r5, [r0, V7M_SCB_SHPR2] @ set SVC priority + mov r5, #0x00800000 + str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority + + @ SVC to run the kernel in this mode + adr r1, BSYM(1f) + ldr r5, [r12, #11 * 4] @ read the SVC vector entry + str r1, [r12, #11 * 4] @ write the temporary SVC vector entry + mov r6, lr @ save LR + mov r7, sp @ save SP + ldr sp, =__v7m_setup_stack_top + cpsie i + svc #0 +1: cpsid i + str r5, [r12, #11 * 4] @ restore the original SVC vector entry + mov lr, r6 @ restore LR + mov sp, r7 @ restore SP + + @ Special-purpose control register + mov r1, #1 + msr control, r1 @ Thread mode has unpriviledged access + + @ Configure the System Control Register to ensure 8-byte stack alignment + @ Note the STKALIGN bit is either RW or RAO. + ldr r12, [r0, V7M_SCB_CCR] @ system control register + orr r12, #V7M_SCB_CCR_STKALIGN + str r12, [r0, V7M_SCB_CCR] + mov pc, lr +ENDPROC(__v7m_setup) + + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + string cpu_arch_name, "armv7m" + string cpu_elf_name "v7m" + string cpu_v7m_name "ARMv7-M" + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Match any ARMv7-M processor core. + */ + .type __v7m_proc_info, #object +__v7m_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + .long 0 @ proc_info_list.__cpu_mm_mmu_flags + .long 0 @ proc_info_list.__cpu_io_mmu_flags + b __v7m_setup @ proc_info_list.__cpu_flush + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT + .long cpu_v7m_name + .long v7m_processor_functions @ proc_info_list.proc + .long 0 @ proc_info_list.tlb + .long 0 @ proc_info_list.user + .long nop_cache_fns @ proc_info_list.cache + .size __v7m_proc_info, . - __v7m_proc_info + +__v7m_setup_stack: + .space 4 * 8 @ 8 registers +__v7m_setup_stack_top: diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index e8efd83b6f2..dc164589004 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -446,8 +446,6 @@ ENTRY(cpu_xsc3_do_resume) ENDPROC(cpu_xsc3_do_resume) #endif - __CPUINIT - .type __xsc3_setup, #function __xsc3_setup: mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index e766f889bfd..d19b1cfcad9 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -558,8 +558,6 @@ ENTRY(cpu_xscale_do_resume) ENDPROC(cpu_xscale_do_resume) #endif - __CPUINIT - .type __xscale_setup, #function __xscale_setup: mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB |