diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 18:07:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 18:07:32 -0700 |
commit | 2e032852245b3dcfe5461d7353e34eb6da095ccf (patch) | |
tree | 69f9fdf03b54d76bb539096e0ec96e91ea8216b1 /arch/arm/include/asm | |
parent | 356f9e74ffaafd11741589a9aa21d6c9d2721417 (diff) | |
parent | 141b97433d77e39ac3ac111a7b3852192035259c (diff) |
Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm
Pull ARM updates from Russell King:
"This set includes adding support for Neon acceleration of RAID6 XOR
code from Ard Biesheuvel, cache flushing and barrier updates from Will
Deacon, and a cleanup to the ARM debug code which reduces the amount
of code by about 500 lines.
A few other cleanups, such as constifying the machine descriptors
which already shouldn't be written to, cleaning up the printing of the
L2 cache size"
* 'for-linus' of git://git.linaro.org/people/rmk/linux-arm: (55 commits)
ARM: 7826/1: debug: support debug ll on hisilicon soc
ARM: 7830/1: delay: don't bother reporting bogomips in /proc/cpuinfo
ARM: 7829/1: Add ".text.unlikely" and ".text.hot" to arm unwind tables
ARM: 7828/1: ARMv7-M: implement restart routine common to all v7-M machines
ARM: 7827/1: highbank: fix debug uart virtual address for LPAE
ARM: 7823/1: errata: workaround Cortex-A15 erratum 773022
ARM: 7806/1: allow DEBUG_UNCOMPRESS for Tegra
ARM: 7793/1: debug: use generic option for ep93xx PL10x debug port
ARM: debug: move SPEAr debug to generic PL01x code
ARM: debug: move davinci debug to generic 8250 code
ARM: debug: move keystone debug to generic 8250 code
ARM: debug: remove DEBUG_ROCKCHIP_UART
ARM: debug: provide generic option choices for 8250 and PL01x ports
ARM: debug: move PL01X debug include into arch/arm/include/debug/
ARM: debug: provide PL01x debug uart phys/virt address configuration options
ARM: debug: add support for word accesses to debug/8250.S
ARM: debug: move 8250 debug include into arch/arm/include/debug/
ARM: debug: provide 8250 debug uart phys/virt address configuration options
ARM: debug: provide 8250 debug uart register shift configuration option
ARM: debug: provide 8250 debug uart flow control configuration option
...
Diffstat (limited to 'arch/arm/include/asm')
-rw-r--r-- | arch/arm/include/asm/assembler.h | 4 | ||||
-rw-r--r-- | arch/arm/include/asm/barrier.h | 32 | ||||
-rw-r--r-- | arch/arm/include/asm/cacheflush.h | 5 | ||||
-rw-r--r-- | arch/arm/include/asm/hardware/debug-8250.S | 29 | ||||
-rw-r--r-- | arch/arm/include/asm/hardware/debug-pl01x.S | 29 | ||||
-rw-r--r-- | arch/arm/include/asm/mach/arch.h | 4 | ||||
-rw-r--r-- | arch/arm/include/asm/memblock.h | 3 | ||||
-rw-r--r-- | arch/arm/include/asm/module.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/neon.h | 36 | ||||
-rw-r--r-- | arch/arm/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/prom.h | 4 | ||||
-rw-r--r-- | arch/arm/include/asm/spinlock.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/switch_to.h | 10 | ||||
-rw-r--r-- | arch/arm/include/asm/thread_info.h | 11 | ||||
-rw-r--r-- | arch/arm/include/asm/tlbflush.h | 181 | ||||
-rw-r--r-- | arch/arm/include/asm/types.h | 40 | ||||
-rw-r--r-- | arch/arm/include/asm/v7m.h | 12 | ||||
-rw-r--r-- | arch/arm/include/asm/xor.h | 73 |
18 files changed, 360 insertions, 119 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index a5fef710af3..fcc1b5bf697 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -220,9 +220,9 @@ #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 .ifeqs "\mode","arm" - ALT_SMP(dmb) + ALT_SMP(dmb ish) .else - ALT_SMP(W(dmb)) + ALT_SMP(W(dmb) ish) .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 8dcd9c702d9..60f15e274e6 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -14,27 +14,27 @@ #endif #if __LINUX_ARM_ARCH__ >= 7 -#define isb() __asm__ __volatile__ ("isb" : : : "memory") -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") -#define dmb() __asm__ __volatile__ ("dmb" : : : "memory") +#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory") +#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory") +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ +#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") #elif defined(CONFIG_CPU_FA526) -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #else -#define isb() __asm__ __volatile__ ("" : : : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define isb(x) __asm__ __volatile__ ("" : : : "memory") +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif #ifdef CONFIG_ARCH_HAS_BARRIERS @@ -42,7 +42,7 @@ #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() -#define wmb() mb() +#define wmb() do { dsb(st); outer_sync(); } while (0) #else #define mb() barrier() #define rmb() barrier() @@ -54,9 +54,9 @@ #define smp_rmb() barrier() #define smp_wmb() barrier() #else -#define smp_mb() dmb() -#define smp_rmb() dmb() -#define smp_wmb() dmb() +#define smp_mb() dmb(ish) +#define smp_rmb() smp_mb() +#define smp_wmb() dmb(ishst) #endif #define read_barrier_depends() do { } while(0) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 17d0ae8672f..15f2d5bf887 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -268,8 +268,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr * Harvard caches are synchronised for the user space address range. * This is used for the ARM private sys_cacheflush system call. */ -#define flush_cache_user_range(start,end) \ - __cpuc_coherent_user_range((start) & PAGE_MASK, PAGE_ALIGN(end)) +#define flush_cache_user_range(s,e) __cpuc_coherent_user_range(s,e) /* * Perform necessary cache operations to ensure that data previously @@ -352,7 +351,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) * set_pte_at() called from vmap_pte_range() does not * have a DSB after cleaning the cache line. */ - dsb(); + dsb(ishst); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm/include/asm/hardware/debug-8250.S b/arch/arm/include/asm/hardware/debug-8250.S deleted file mode 100644 index 22c689255e6..00000000000 --- a/arch/arm/include/asm/hardware/debug-8250.S +++ /dev/null @@ -1,29 +0,0 @@ -/* - * arch/arm/include/asm/hardware/debug-8250.S - * - * Copyright (C) 1994-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/serial_reg.h> - - .macro senduart,rd,rx - strb \rd, [\rx, #UART_TX << UART_SHIFT] - .endm - - .macro busyuart,rd,rx -1002: ldrb \rd, [\rx, #UART_LSR << UART_SHIFT] - and \rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE - teq \rd, #UART_LSR_TEMT | UART_LSR_THRE - bne 1002b - .endm - - .macro waituart,rd,rx -#ifdef FLOW_CONTROL -1001: ldrb \rd, [\rx, #UART_MSR << UART_SHIFT] - tst \rd, #UART_MSR_CTS - beq 1001b -#endif - .endm diff --git a/arch/arm/include/asm/hardware/debug-pl01x.S b/arch/arm/include/asm/hardware/debug-pl01x.S deleted file mode 100644 index f9fd083eff6..00000000000 --- a/arch/arm/include/asm/hardware/debug-pl01x.S +++ /dev/null @@ -1,29 +0,0 @@ -/* arch/arm/include/asm/hardware/debug-pl01x.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * -*/ -#include <linux/amba/serial.h> - - .macro senduart,rd,rx - strb \rd, [\rx, #UART01x_DR] - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #UART01x_FR] - tst \rd, #UART01x_FR_TXFF - bne 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #UART01x_FR] - tst \rd, #UART01x_FR_BUSY - bne 1001b - .endm diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 441efc491b5..69b879ac028 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -65,12 +65,12 @@ struct machine_desc { /* * Current machine - only accessible during boot. */ -extern struct machine_desc *machine_desc; +extern const struct machine_desc *machine_desc; /* * Machine type table - also only accessible during boot */ -extern struct machine_desc __arch_info_begin[], __arch_info_end[]; +extern const struct machine_desc __arch_info_begin[], __arch_info_end[]; #define for_each_machine_desc(p) \ for (p = __arch_info_begin; p < __arch_info_end; p++) diff --git a/arch/arm/include/asm/memblock.h b/arch/arm/include/asm/memblock.h index 00ca5f92648..c2f5102ae65 100644 --- a/arch/arm/include/asm/memblock.h +++ b/arch/arm/include/asm/memblock.h @@ -4,8 +4,7 @@ struct meminfo; struct machine_desc; -extern void arm_memblock_init(struct meminfo *, struct machine_desc *); - +void arm_memblock_init(struct meminfo *, const struct machine_desc *); phys_addr_t arm_memblock_steal(phys_addr_t size, phys_addr_t align); #endif diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 0d3a28dbc8e..ed690c49ef9 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -12,6 +12,8 @@ enum { ARM_SEC_CORE, ARM_SEC_EXIT, ARM_SEC_DEVEXIT, + ARM_SEC_HOT, + ARM_SEC_UNLIKELY, ARM_SEC_MAX, }; diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h new file mode 100644 index 00000000000..8f730fe7009 --- /dev/null +++ b/arch/arm/include/asm/neon.h @@ -0,0 +1,36 @@ +/* + * linux/arch/arm/include/asm/neon.h + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/hwcap.h> + +#define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON)) + +#ifdef __ARM_NEON__ + +/* + * If you are affected by the BUILD_BUG below, it probably means that you are + * using NEON code /and/ calling the kernel_neon_begin() function from the same + * compilation unit. To prevent issues that may arise from GCC reordering or + * generating(1) NEON instructions outside of these begin/end functions, the + * only supported way of using NEON code in the kernel is by isolating it in a + * separate compilation unit, and calling it from another unit from inside a + * kernel_neon_begin/kernel_neon_end pair. + * + * (1) Current GCC (4.7) might generate NEON instructions at O3 level if + * -mpfu=neon is set. + */ + +#define kernel_neon_begin() \ + BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code") + +#else +void kernel_neon_begin(void); +#endif +void kernel_neon_end(void); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 04aeb02d2e1..be956dbf6ba 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index a219227c3e4..4a2985e2196 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -15,13 +15,13 @@ #ifdef CONFIG_OF -extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys); extern void arm_dt_memblock_reserve(void); extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ -static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys) { return NULL; } diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index b07c09e5a0a..4f2c28060c9 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -46,7 +46,7 @@ static inline void dsb_sev(void) { #if __LINUX_ARM_ARCH__ >= 7 __asm__ __volatile__ ( - "dsb\n" + "dsb ishst\n" SEV ); #else diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index fa09e6b49bf..c99e259469f 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -4,6 +4,16 @@ #include <linux/thread_info.h> /* + * For v7 SMP cores running a preemptible kernel we may be pre-empted + * during a TLB maintenance operation, so execute an inner-shareable dsb + * to ensure that the maintenance completes in case we migrate to another + * CPU. + */ +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) +#define finish_arch_switch(prev) dsb(ish) +#endif + +/* * switch_to(prev, next) should switch from task `prev' to `next' * `prev' will never be the same as `next'. schedule() itself * contains the memory barrier to tell GCC not to cache `current'. diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 2b8114fcba0..df5e13d64f2 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -43,6 +43,16 @@ struct cpu_context_save { __u32 extra[2]; /* Xscale 'acc' register, etc */ }; +struct arm_restart_block { + union { + /* For user cache flushing */ + struct { + unsigned long start; + unsigned long end; + } cache; + }; +}; + /* * low level task data that entry.S needs immediate access to. * __switch_to() assumes cpu_context follows immediately after cpu_domain. @@ -68,6 +78,7 @@ struct thread_info { unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif struct restart_block restart_block; + struct arm_restart_block arm_restart_block; }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index f467e9b3f8d..38960264040 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -319,67 +319,110 @@ extern struct cpu_tlb_fns cpu_tlb; #define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg) #define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg) -static inline void local_flush_tlb_all(void) +static inline void __local_flush_tlb_all(void) { const int zero = 0; const unsigned int __tlb_flag = __cpu_tlb_flags; - if (tlb_flag(TLB_WB)) - dsb(); - tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); - tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero); +} + +static inline void local_flush_tlb_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_WB)) + dsb(nshst); + + __local_flush_tlb_all(); + tlb_op(TLB_V7_UIS_FULL, "c8, c7, 0", zero); if (tlb_flag(TLB_BARRIER)) { - dsb(); + dsb(nsh); isb(); } } -static inline void local_flush_tlb_mm(struct mm_struct *mm) +static inline void __flush_tlb_all(void) { const int zero = 0; - const int asid = ASID(mm); const unsigned int __tlb_flag = __cpu_tlb_flags; if (tlb_flag(TLB_WB)) - dsb(); + dsb(ishst); + + __local_flush_tlb_all(); + tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero); + + if (tlb_flag(TLB_BARRIER)) { + dsb(ish); + isb(); + } +} + +static inline void __local_flush_tlb_mm(struct mm_struct *mm) +{ + const int zero = 0; + const int asid = ASID(mm); + const unsigned int __tlb_flag = __cpu_tlb_flags; if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { - if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); } - put_cpu(); } tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid); tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid); tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid); +} + +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + const int asid = ASID(mm); + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_WB)) + dsb(nshst); + + __local_flush_tlb_mm(mm); + tlb_op(TLB_V7_UIS_ASID, "c8, c7, 2", asid); + + if (tlb_flag(TLB_BARRIER)) + dsb(nsh); +} + +static inline void __flush_tlb_mm(struct mm_struct *mm) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_WB)) + dsb(ishst); + + __local_flush_tlb_mm(mm); #ifdef CONFIG_ARM_ERRATA_720789 - tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero); + tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", 0); #else - tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid); + tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", ASID(mm)); #endif if (tlb_flag(TLB_BARRIER)) - dsb(); + dsb(ish); } static inline void -local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { const int zero = 0; const unsigned int __tlb_flag = __cpu_tlb_flags; uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); - if (tlb_flag(TLB_WB)) - dsb(); - if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); @@ -392,6 +435,36 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr); tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr); tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr); +} + +static inline void +local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); + + if (tlb_flag(TLB_WB)) + dsb(nshst); + + __local_flush_tlb_page(vma, uaddr); + tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", uaddr); + + if (tlb_flag(TLB_BARRIER)) + dsb(nsh); +} + +static inline void +__flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); + + if (tlb_flag(TLB_WB)) + dsb(ishst); + + __local_flush_tlb_page(vma, uaddr); #ifdef CONFIG_ARM_ERRATA_720789 tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK); #else @@ -399,19 +472,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) #endif if (tlb_flag(TLB_BARRIER)) - dsb(); + dsb(ish); } -static inline void local_flush_tlb_kernel_page(unsigned long kaddr) +static inline void __local_flush_tlb_kernel_page(unsigned long kaddr) { const int zero = 0; const unsigned int __tlb_flag = __cpu_tlb_flags; - kaddr &= PAGE_MASK; - - if (tlb_flag(TLB_WB)) - dsb(); - tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); @@ -421,26 +489,75 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr); +} + +static inline void local_flush_tlb_kernel_page(unsigned long kaddr) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + kaddr &= PAGE_MASK; + + if (tlb_flag(TLB_WB)) + dsb(nshst); + + __local_flush_tlb_kernel_page(kaddr); + tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", kaddr); + + if (tlb_flag(TLB_BARRIER)) { + dsb(nsh); + isb(); + } +} + +static inline void __flush_tlb_kernel_page(unsigned long kaddr) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + kaddr &= PAGE_MASK; + + if (tlb_flag(TLB_WB)) + dsb(ishst); + + __local_flush_tlb_kernel_page(kaddr); tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr); if (tlb_flag(TLB_BARRIER)) { - dsb(); + dsb(ish); isb(); } } +/* + * Branch predictor maintenance is paired with full TLB invalidation, so + * there is no need for any barriers here. + */ +static inline void __local_flush_bp_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_V6_BP)) + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero)); +} + static inline void local_flush_bp_all(void) { const int zero = 0; const unsigned int __tlb_flag = __cpu_tlb_flags; + __local_flush_bp_all(); if (tlb_flag(TLB_V7_UIS_BP)) - asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); - else if (tlb_flag(TLB_V6_BP)) asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero)); +} - if (tlb_flag(TLB_BARRIER)) - isb(); +static inline void __flush_bp_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + __local_flush_bp_all(); + if (tlb_flag(TLB_V7_UIS_BP)) + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); } #include <asm/cputype.h> @@ -461,7 +578,7 @@ static inline void dummy_flush_tlb_a15_erratum(void) * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. */ asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); - dsb(); + dsb(ish); } #else static inline int erratum_a15_798181(void) @@ -495,7 +612,7 @@ static inline void flush_pmd_entry(void *pmd) tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd); if (tlb_flag(TLB_WB)) - dsb(); + dsb(ishst); } static inline void clean_pmd_entry(void *pmd) diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h new file mode 100644 index 00000000000..a53cdb8f068 --- /dev/null +++ b/arch/arm/include/asm/types.h @@ -0,0 +1,40 @@ +#ifndef _ASM_TYPES_H +#define _ASM_TYPES_H + +#include <asm-generic/int-ll64.h> + +/* + * The C99 types uintXX_t that are usually defined in 'stdint.h' are not as + * unambiguous on ARM as you would expect. For the types below, there is a + * difference on ARM between GCC built for bare metal ARM, GCC built for glibc + * and the kernel itself, which results in build errors if you try to build with + * -ffreestanding and include 'stdint.h' (such as when you include 'arm_neon.h' + * in order to use NEON intrinsics) + * + * As the typedefs for these types in 'stdint.h' are based on builtin defines + * supplied by GCC, we can tweak these to align with the kernel's idea of those + * types, so 'linux/types.h' and 'stdint.h' can be safely included from the same + * source file (provided that -ffreestanding is used). + * + * int32_t uint32_t uintptr_t + * bare metal GCC long unsigned long unsigned int + * glibc GCC int unsigned int unsigned int + * kernel int unsigned int unsigned long + */ + +#ifdef __INT32_TYPE__ +#undef __INT32_TYPE__ +#define __INT32_TYPE__ int +#endif + +#ifdef __UINT32_TYPE__ +#undef __UINT32_TYPE__ +#define __UINT32_TYPE__ unsigned int +#endif + +#ifdef __UINTPTR_TYPE__ +#undef __UINTPTR_TYPE__ +#define __UINTPTR_TYPE__ unsigned long +#endif + +#endif /* _ASM_TYPES_H */ diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h index fa88d09fa3d..615781c6162 100644 --- a/arch/arm/include/asm/v7m.h +++ b/arch/arm/include/asm/v7m.h @@ -15,6 +15,10 @@ #define V7M_SCB_VTOR 0x08 +#define V7M_SCB_AIRCR 0x0c +#define V7M_SCB_AIRCR_VECTKEY (0x05fa << 16) +#define V7M_SCB_AIRCR_SYSRESETREQ (1 << 2) + #define V7M_SCB_SCR 0x10 #define V7M_SCB_SCR_SLEEPDEEP (1 << 2) @@ -42,3 +46,11 @@ */ #define EXC_RET_STACK_MASK 0x00000004 #define EXC_RET_THREADMODE_PROCESSSTACK 0xfffffffd + +#ifndef __ASSEMBLY__ + +enum reboot_mode; + +void armv7m_restart(enum reboot_mode mode, const char *cmd); + +#endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index 7604673dc42..4ffb26d4cad 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -7,7 +7,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/hardirq.h> #include <asm-generic/xor.h> +#include <asm/hwcap.h> +#include <asm/neon.h> #define __XOR(a1, a2) a1 ^= a2 @@ -138,4 +141,74 @@ static struct xor_block_template xor_block_arm4regs = { xor_speed(&xor_block_arm4regs); \ xor_speed(&xor_block_8regs); \ xor_speed(&xor_block_32regs); \ + NEON_TEMPLATES; \ } while (0) + +#ifdef CONFIG_KERNEL_MODE_NEON + +extern struct xor_block_template const xor_block_neon_inner; + +static void +xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + if (in_interrupt()) { + xor_arm4regs_2(bytes, p1, p2); + } else { + kernel_neon_begin(); + xor_block_neon_inner.do_2(bytes, p1, p2); + kernel_neon_end(); + } +} + +static void +xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + if (in_interrupt()) { + xor_arm4regs_3(bytes, p1, p2, p3); + } else { + kernel_neon_begin(); + xor_block_neon_inner.do_3(bytes, p1, p2, p3); + kernel_neon_end(); + } +} + +static void +xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + if (in_interrupt()) { + xor_arm4regs_4(bytes, p1, p2, p3, p4); + } else { + kernel_neon_begin(); + xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); + kernel_neon_end(); + } +} + +static void +xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + if (in_interrupt()) { + xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); + } else { + kernel_neon_begin(); + xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); + kernel_neon_end(); + } +} + +static struct xor_block_template xor_block_neon = { + .name = "neon", + .do_2 = xor_neon_2, + .do_3 = xor_neon_3, + .do_4 = xor_neon_4, + .do_5 = xor_neon_5 +}; + +#define NEON_TEMPLATES \ + do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) +#else +#define NEON_TEMPLATES +#endif |