summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/Kconfig17
-rw-r--r--arch/arm/Makefile3
-rw-r--r--arch/arm/boot/compressed/Makefile5
-rw-r--r--arch/arm/boot/compressed/head.S8
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.S (renamed from arch/arm/boot/compressed/vmlinux.lds.in)17
-rw-r--r--arch/arm/common/mcpm_entry.c52
-rw-r--r--arch/arm/crypto/Makefile4
-rw-r--r--arch/arm/crypto/aes-armv4.S3
-rw-r--r--arch/arm/crypto/sha1-armv7-neon.S634
-rw-r--r--arch/arm/crypto/sha1_glue.c58
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c197
-rw-r--r--arch/arm/crypto/sha512-armv7-neon.S455
-rw-r--r--arch/arm/crypto/sha512_neon_glue.c305
-rw-r--r--arch/arm/include/asm/assembler.h29
-rw-r--r--arch/arm/include/asm/cputype.h37
-rw-r--r--arch/arm/include/asm/crypto/sha1.h10
-rw-r--r--arch/arm/include/asm/entry-macro-multi.S2
-rw-r--r--arch/arm/include/asm/glue-proc.h18
-rw-r--r--arch/arm/include/asm/mcpm.h16
-rw-r--r--arch/arm/include/asm/mcs_spinlock.h23
-rw-r--r--arch/arm/include/asm/memory.h10
-rw-r--r--arch/arm/include/asm/perf_event.h9
-rw-r--r--arch/arm/include/asm/pgtable-3level-hwdef.h3
-rw-r--r--arch/arm/include/asm/pgtable-3level.h49
-rw-r--r--arch/arm/include/asm/pgtable.h18
-rw-r--r--arch/arm/include/asm/pmu.h19
-rw-r--r--arch/arm/include/asm/ptrace.h6
-rw-r--r--arch/arm/include/asm/smp_scu.h2
-rw-r--r--arch/arm/include/asm/stacktrace.h15
-rw-r--r--arch/arm/include/asm/thread_info.h3
-rw-r--r--arch/arm/include/asm/uaccess.h22
-rw-r--r--arch/arm/include/asm/unistd.h10
-rw-r--r--arch/arm/include/uapi/asm/unistd.h11
-rw-r--r--arch/arm/kernel/debug.S10
-rw-r--r--arch/arm/kernel/entry-armv.S42
-rw-r--r--arch/arm/kernel/entry-common.S13
-rw-r--r--arch/arm/kernel/entry-header.S14
-rw-r--r--arch/arm/kernel/fiqasm.S4
-rw-r--r--arch/arm/kernel/head-common.S7
-rw-r--r--arch/arm/kernel/head-nommu.S8
-rw-r--r--arch/arm/kernel/head.S18
-rw-r--r--arch/arm/kernel/hyp-stub.S6
-rw-r--r--arch/arm/kernel/iwmmxt.S16
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/arm/kernel/perf_event_cpu.c66
-rw-r--r--arch/arm/kernel/perf_event_v6.c307
-rw-r--r--arch/arm/kernel/perf_event_v7.c967
-rw-r--r--arch/arm/kernel/perf_event_xscale.c121
-rw-r--r--arch/arm/kernel/relocate_kernel.S3
-rw-r--r--arch/arm/kernel/setup.c29
-rw-r--r--arch/arm/kernel/sleep.S2
-rw-r--r--arch/arm/kernel/smp_scu.c12
-rw-r--r--arch/arm/kernel/smp_tlb.c20
-rw-r--r--arch/arm/kernel/swp_emulate.c4
-rw-r--r--arch/arm/kernel/time.c5
-rw-r--r--arch/arm/kernel/traps.c6
-rw-r--r--arch/arm/kernel/unwind.c8
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
-rw-r--r--arch/arm/kvm/guest.c8
-rw-r--r--arch/arm/kvm/init.S3
-rw-r--r--arch/arm/lib/ashldi3.S3
-rw-r--r--arch/arm/lib/ashrdi3.S3
-rw-r--r--arch/arm/lib/backtrace.S2
-rw-r--r--arch/arm/lib/bitops.h5
-rw-r--r--arch/arm/lib/bswapsdi2.S5
-rw-r--r--arch/arm/lib/call_with_stack.S4
-rw-r--r--arch/arm/lib/csumpartial.S2
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S5
-rw-r--r--arch/arm/lib/delay-loop.S18
-rw-r--r--arch/arm/lib/div64.S13
-rw-r--r--arch/arm/lib/findbit.S10
-rw-r--r--arch/arm/lib/getuser.S45
-rw-r--r--arch/arm/lib/io-readsb.S2
-rw-r--r--arch/arm/lib/io-readsl.S6
-rw-r--r--arch/arm/lib/io-readsw-armv3.S4
-rw-r--r--arch/arm/lib/io-readsw-armv4.S2
-rw-r--r--arch/arm/lib/io-writesb.S2
-rw-r--r--arch/arm/lib/io-writesl.S10
-rw-r--r--arch/arm/lib/io-writesw-armv3.S4
-rw-r--r--arch/arm/lib/io-writesw-armv4.S4
-rw-r--r--arch/arm/lib/lib1funcs.S26
-rw-r--r--arch/arm/lib/lshrdi3.S3
-rw-r--r--arch/arm/lib/memchr.S2
-rw-r--r--arch/arm/lib/memset.S2
-rw-r--r--arch/arm/lib/memzero.S2
-rw-r--r--arch/arm/lib/muldi3.S3
-rw-r--r--arch/arm/lib/putuser.S10
-rw-r--r--arch/arm/lib/strchr.S2
-rw-r--r--arch/arm/lib/strrchr.S2
-rw-r--r--arch/arm/lib/ucmpdi2.S5
-rw-r--r--arch/arm/mach-davinci/sleep.S2
-rw-r--r--arch/arm/mach-ebsa110/include/mach/memory.h5
-rw-r--r--arch/arm/mach-ep93xx/crunch-bits.S6
-rw-r--r--arch/arm/mach-ep93xx/include/mach/memory.h22
-rw-r--r--arch/arm/mach-exynos/Kconfig1
-rw-r--r--arch/arm/mach-exynos/mcpm-exynos.c17
-rw-r--r--arch/arm/mach-exynos/platsmp.c4
-rw-r--r--arch/arm/mach-exynos/pm.c11
-rw-r--r--arch/arm/mach-footbridge/include/mach/memory.h5
-rw-r--r--arch/arm/mach-imx/suspend-imx6.S5
-rw-r--r--arch/arm/mach-integrator/include/mach/memory.h5
-rw-r--r--arch/arm/mach-iop13xx/include/mach/iop13xx.h2
-rw-r--r--arch/arm/mach-iop13xx/include/mach/memory.h5
-rw-r--r--arch/arm/mach-iop13xx/setup.c1
-rw-r--r--arch/arm/mach-ks8695/include/mach/memory.h5
-rw-r--r--arch/arm/mach-mvebu/coherency_ll.S10
-rw-r--r--arch/arm/mach-mvebu/headsmp-a9.S2
-rw-r--r--arch/arm/mach-omap1/include/mach/memory.h5
-rw-r--r--arch/arm/mach-omap2/sleep44xx.S3
-rw-r--r--arch/arm/mach-omap2/sram242x.S6
-rw-r--r--arch/arm/mach-omap2/sram243x.S6
-rw-r--r--arch/arm/mach-pxa/mioa701_bootresume.S2
-rw-r--r--arch/arm/mach-pxa/standby.S4
-rw-r--r--arch/arm/mach-realview/include/mach/memory.h9
-rw-r--r--arch/arm/mach-rpc/include/mach/memory.h5
-rw-r--r--arch/arm/mach-s3c24xx/sleep-s3c2410.S2
-rw-r--r--arch/arm/mach-s3c24xx/sleep-s3c2412.S2
-rw-r--r--arch/arm/mach-s5pv210/include/mach/memory.h2
-rw-r--r--arch/arm/mach-sa1100/include/mach/memory.h5
-rw-r--r--arch/arm/mach-shmobile/headsmp.S3
-rw-r--r--arch/arm/mach-tegra/sleep-tegra20.S24
-rw-r--r--arch/arm/mach-tegra/sleep-tegra30.S14
-rw-r--r--arch/arm/mach-tegra/sleep.S8
-rw-r--r--arch/arm/mach-vexpress/tc2_pm.c19
-rw-r--r--arch/arm/mm/Kconfig9
-rw-r--r--arch/arm/mm/alignment.c4
-rw-r--r--arch/arm/mm/cache-fa.S19
-rw-r--r--arch/arm/mm/cache-l2x0.c2
-rw-r--r--arch/arm/mm/cache-nop.S5
-rw-r--r--arch/arm/mm/cache-v4.S13
-rw-r--r--arch/arm/mm/cache-v4wb.S15
-rw-r--r--arch/arm/mm/cache-v4wt.S13
-rw-r--r--arch/arm/mm/cache-v6.S20
-rw-r--r--arch/arm/mm/cache-v7.S30
-rw-r--r--arch/arm/mm/dump.c4
-rw-r--r--arch/arm/mm/l2c-l2x0-resume.S7
-rw-r--r--arch/arm/mm/mmu.c45
-rw-r--r--arch/arm/mm/proc-arm1020.S34
-rw-r--r--arch/arm/mm/proc-arm1020e.S34
-rw-r--r--arch/arm/mm/proc-arm1022.S34
-rw-r--r--arch/arm/mm/proc-arm1026.S34
-rw-r--r--arch/arm/mm/proc-arm720.S16
-rw-r--r--arch/arm/mm/proc-arm740.S8
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S8
-rw-r--r--arch/arm/mm/proc-arm920.S34
-rw-r--r--arch/arm/mm/proc-arm922.S34
-rw-r--r--arch/arm/mm/proc-arm925.S34
-rw-r--r--arch/arm/mm/proc-arm926.S34
-rw-r--r--arch/arm/mm/proc-arm940.S24
-rw-r--r--arch/arm/mm/proc-arm946.S30
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S8
-rw-r--r--arch/arm/mm/proc-fa526.S16
-rw-r--r--arch/arm/mm/proc-feroceon.S44
-rw-r--r--arch/arm/mm/proc-mohawk.S34
-rw-r--r--arch/arm/mm/proc-sa110.S16
-rw-r--r--arch/arm/mm/proc-sa1100.S16
-rw-r--r--arch/arm/mm/proc-v6.S16
-rw-r--r--arch/arm/mm/proc-v7-2level.S4
-rw-r--r--arch/arm/mm/proc-v7-3level.S14
-rw-r--r--arch/arm/mm/proc-v7.S74
-rw-r--r--arch/arm/mm/proc-v7m.S18
-rw-r--r--arch/arm/mm/proc-xsc3.S32
-rw-r--r--arch/arm/mm/proc-xscale.S34
-rw-r--r--arch/arm/mm/tlb-fa.S7
-rw-r--r--arch/arm/mm/tlb-v4.S5
-rw-r--r--arch/arm/mm/tlb-v4wb.S7
-rw-r--r--arch/arm/mm/tlb-v4wbi.S7
-rw-r--r--arch/arm/mm/tlb-v6.S5
-rw-r--r--arch/arm/mm/tlb-v7.S4
-rw-r--r--arch/arm/nwfpe/entry.S8
-rw-r--r--arch/arm/oprofile/common.c19
-rw-r--r--arch/arm/plat-omap/dma.c2
-rw-r--r--arch/arm/vfp/entry.S4
-rw-r--r--arch/arm/vfp/vfphw.S26
-rw-r--r--arch/arm/xen/hypercall.S6
-rw-r--r--crypto/Kconfig26
-rw-r--r--drivers/clocksource/arm_global_timer.c2
177 files changed, 3213 insertions, 2006 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 290f02ee015..1e14b9068a3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -263,8 +263,22 @@ config NEED_MACH_MEMORY_H
config PHYS_OFFSET
hex "Physical address of main memory" if MMU
- depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+ depends on !ARM_PATCH_PHYS_VIRT
default DRAM_BASE if !MMU
+ default 0x00000000 if ARCH_EBSA110 || \
+ EP93XX_SDCE3_SYNC_PHYS_OFFSET || \
+ ARCH_FOOTBRIDGE || \
+ ARCH_INTEGRATOR || \
+ ARCH_IOP13XX || \
+ ARCH_KS8695 || \
+ (ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET)
+ default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
+ default 0x20000000 if ARCH_S5PV210
+ default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET
+ default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100
+ default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET
+ default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET
+ default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET
help
Please provide the physical address corresponding to the
location of main memory in your system.
@@ -436,7 +450,6 @@ config ARCH_EP93XX
select ARM_VIC
select CLKDEV_LOOKUP
select CPU_ARM920T
- select NEED_MACH_MEMORY_H
help
This enables support for the Cirrus EP93xx series of CPUs.
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 6721fab1373..718913dfe81 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -127,6 +127,9 @@ CHECKFLAGS += -D__arm__
#Default value
head-y := arch/arm/kernel/head$(MMUEXT).o
+
+# Text offset. This list is sorted numerically by address in order to
+# provide a means to avoid/resolve conflicts in multi-arch kernels.
textofs-y := 0x00008000
textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
# We don't want the htc bootloader to corrupt kernel during resume
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 68c918362b7..76a50ecae1c 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -81,7 +81,7 @@ ZTEXTADDR := 0
ZBSSADDR := ALIGN(8)
endif
-SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
+CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
suffix_$(CONFIG_KERNEL_GZIP) = gzip
suffix_$(CONFIG_KERNEL_LZO) = lzo
@@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic=
$(obj)/font.c: $(FONTC)
$(call cmd,shipped)
-$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
- @sed "$(SEDFLAGS)" < $< > $@
-
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
$(call cmd,shipped)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 3a8b32df6b3..413fd94b530 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -125,9 +125,11 @@ start:
THUMB( adr r12, BSYM(1f) )
THUMB( bx r12 )
- .word 0x016f2818 @ Magic numbers to help the loader
- .word start @ absolute load/run zImage address
- .word _edata @ zImage end address
+ .word _magic_sig @ Magic numbers to help the loader
+ .word _magic_start @ absolute load/run zImage address
+ .word _magic_end @ zImage end address
+ .word 0x04030201 @ endianness flag
+
THUMB( .thumb )
1:
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.S
index 4919f2ac8b8..2b60b843ac5 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -1,12 +1,20 @@
/*
- * linux/arch/arm/boot/compressed/vmlinux.lds.in
- *
* Copyright (C) 2000 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
+ (((x) >> 8) & 0x0000ff00) | \
+ (((x) << 8) & 0x00ff0000) | \
+ (((x) << 24) & 0xff000000) )
+#else
+#define ZIMAGE_MAGIC(x) (x)
+#endif
+
OUTPUT_ARCH(arm)
ENTRY(_start)
SECTIONS
@@ -57,6 +65,10 @@ SECTIONS
.pad : { BYTE(0); . = ALIGN(8); }
_edata = .;
+ _magic_sig = ZIMAGE_MAGIC(0x016f2818);
+ _magic_start = ZIMAGE_MAGIC(_start);
+ _magic_end = ZIMAGE_MAGIC(_edata);
+
. = BSS_START;
__bss_start = .;
.bss : { *(.bss) }
@@ -73,4 +85,3 @@ SECTIONS
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
}
-
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index f91136ab447..3c165fc2dce 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -12,11 +12,13 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irqflags.h>
+#include <linux/cpu_pm.h>
#include <asm/mcpm.h>
#include <asm/cacheflush.h>
#include <asm/idmap.h>
#include <asm/cputype.h>
+#include <asm/suspend.h>
extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
@@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void)
return 0;
}
+#ifdef CONFIG_ARM_CPU_SUSPEND
+
+static int __init nocache_trampoline(unsigned long _arg)
+{
+ void (*cache_disable)(void) = (void *)_arg;
+ unsigned int mpidr = read_cpuid_mpidr();
+ unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ phys_reset_t phys_reset;
+
+ mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+ setup_mm_for_reboot();
+
+ __mcpm_cpu_going_down(cpu, cluster);
+ BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
+ cache_disable();
+ __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+ __mcpm_cpu_down(cpu, cluster);
+
+ phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+ phys_reset(virt_to_phys(mcpm_entry_point));
+ BUG();
+}
+
+int __init mcpm_loopback(void (*cache_disable)(void))
+{
+ int ret;
+
+ /*
+ * We're going to soft-restart the current CPU through the
+ * low-level MCPM code by leveraging the suspend/resume
+ * infrastructure. Let's play it safe by using cpu_pm_enter()
+ * in case the CPU init code path resets the VFP or similar.
+ */
+ local_irq_disable();
+ local_fiq_disable();
+ ret = cpu_pm_enter();
+ if (!ret) {
+ ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline);
+ cpu_pm_exit();
+ }
+ local_fiq_enable();
+ local_irq_enable();
+ if (ret)
+ pr_err("%s returned %d\n", __func__, ret);
+ return ret;
+}
+
+#endif
+
struct sync_struct mcpm_sync;
/*
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 81cda39860c..b48fa341648 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -5,10 +5,14 @@
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
+sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
+sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index 3a14ea8fe97..ebb9761fb57 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -35,6 +35,7 @@
@ that is being targetted.
#include <linux/linkage.h>
+#include <asm/assembler.h>
.text
@@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key:
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt: mov pc,lr
+.Labrt: ret lr
ENDPROC(private_AES_set_encrypt_key)
.align 5
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
new file mode 100644
index 00000000000..50013c0e286
--- /dev/null
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -0,0 +1,634 @@
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code 32
+.fpu neon
+
+.text
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 4
+.LK_VEC:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ bic RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ and RT1, c, b; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add RT0, RT0, RT3; \
+ add e, e, RT1; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0;
+
+#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ eor RT0, RT0, c; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT3; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0; \
+
+#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, b, c; \
+ and RT1, b, c; \
+ add e, e, a, ror #(32 - 5); \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ and RT0, RT0, d; \
+ add RT1, RT1, RT3; \
+ add e, e, RT0; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT1;
+
+#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+ _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 *************************************/
+
+#define W_PRECALC_00_15() \
+ add RWK, sp, #(WK_offs(0)); \
+ \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+ vrev32.8 W0, tmp0; /* big => little */ \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vadd.u32 tmp0, W0, curK; \
+ vrev32.8 W7, tmp1; /* big => little */ \
+ vrev32.8 W6, tmp2; /* big => little */ \
+ vadd.u32 tmp1, W7, curK; \
+ vrev32.8 W5, tmp3; /* big => little */ \
+ vadd.u32 tmp2, W6, curK; \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+ vadd.u32 tmp3, W5, curK; \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+
+#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(0)); \
+
+#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W0, tmp0; /* big => little */ \
+
+#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+
+#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W0, curK; \
+
+#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W7, tmp1; /* big => little */ \
+
+#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W6, tmp2; /* big => little */ \
+
+#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp1, W7, curK; \
+
+#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W5, tmp3; /* big => little */ \
+
+#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp2, W6, curK; \
+
+#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+
+#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp3, W5, curK; \
+
+#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+
+/********* Precalc macros for rounds 16-31 ************************************/
+
+#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0; \
+ vext.8 W, W_m16, W_m12, #8; \
+
+#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(i)); \
+ vext.8 tmp0, W_m04, tmp0, #4; \
+
+#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0, W_m16; \
+ veor.32 W, W, W_m08; \
+
+#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp1, tmp1; \
+ veor W, W, tmp0; \
+
+#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp0, W, #1; \
+
+#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vext.8 tmp1, tmp1, W, #(16-12); \
+ vshr.u32 W, W, #31; \
+
+#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vorr tmp0, tmp0, W; \
+ vshr.u32 W, tmp1, #30; \
+
+#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp1, tmp1, #2; \
+
+#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0, W; \
+
+#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, tmp0, tmp1; \
+
+#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 ************************************/
+
+#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * sha1_transform_neon (void *ctx, const unsigned char *data,
+ * unsigned int nblks)
+ */
+.align 3
+ENTRY(sha1_transform_neon)
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+
+ cmp RNBLKS, #0;
+ beq .Ldo_nothing;
+
+ push {r4-r12, lr};
+ /*vpush {q4-q7};*/
+
+ adr RT3, .LK_VEC;
+
+ mov ROLDSTACK, sp;
+
+ /* Align stack. */
+ sub RT0, sp, #(16*4);
+ and RT0, #(~(16-1));
+ mov sp, RT0;
+
+ vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+ /* Get the values of the chaining variables. */
+ ldm RSTATE, {_a-_e};
+
+ vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+ /* Precalc 0-15. */
+ W_PRECALC_00_15();
+
+.Loop:
+ /* Transform 0-15 + Precalc 16-31. */
+ _R( _a, _b, _c, _d, _e, F1, 0,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 1,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 2,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 3,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
+ W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+ _R( _b, _c, _d, _e, _a, F1, 4,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 5,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 6,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 7,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
+ W3, W4, W5, W6, W7, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F1, 8,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 9,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 10,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 11,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
+ W2, W3, W4, W5, W6, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F1, 12,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 13,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 14,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 15,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
+ W1, W2, W3, W4, W5, _, _, _ );
+
+ /* Transform 16-63 + Precalc 32-79. */
+ _R( _e, _a, _b, _c, _d, F1, 16,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _d, _e, _a, _b, _c, F1, 17,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _c, _d, _e, _a, _b, F1, 18,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F1, 19,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _a, _b, _c, _d, _e, F2, 20,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _e, _a, _b, _c, _d, F2, 21,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _d, _e, _a, _b, _c, F2, 22,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F2, 23,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+ _R( _b, _c, _d, _e, _a, F2, 24,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _a, _b, _c, _d, _e, F2, 25,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _e, _a, _b, _c, _d, F2, 26,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F2, 27,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+
+ _R( _c, _d, _e, _a, _b, F2, 28,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _b, _c, _d, _e, _a, F2, 29,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _a, _b, _c, _d, _e, F2, 30,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F2, 31,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+
+ _R( _d, _e, _a, _b, _c, F2, 32,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _c, _d, _e, _a, _b, F2, 33,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _b, _c, _d, _e, _a, F2, 34,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _a, _b, _c, _d, _e, F2, 35,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+
+ _R( _e, _a, _b, _c, _d, F2, 36,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _d, _e, _a, _b, _c, F2, 37,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _c, _d, _e, _a, _b, F2, 38,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _b, _c, _d, _e, _a, F2, 39,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+
+ _R( _a, _b, _c, _d, _e, F3, 40,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _e, _a, _b, _c, _d, F3, 41,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _d, _e, _a, _b, _c, F3, 42,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _c, _d, _e, _a, _b, F3, 43,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+ _R( _b, _c, _d, _e, _a, F3, 44,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _a, _b, _c, _d, _e, F3, 45,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _e, _a, _b, _c, _d, F3, 46,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _d, _e, _a, _b, _c, F3, 47,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+
+ _R( _c, _d, _e, _a, _b, F3, 48,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F3, 49,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _a, _b, _c, _d, _e, F3, 50,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _e, _a, _b, _c, _d, F3, 51,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _d, _e, _a, _b, _c, F3, 52,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F3, 53,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _b, _c, _d, _e, _a, F3, 54,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _a, _b, _c, _d, _e, F3, 55,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+
+ _R( _e, _a, _b, _c, _d, F3, 56,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F3, 57,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _c, _d, _e, _a, _b, F3, 58,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _b, _c, _d, _e, _a, F3, 59,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+
+ subs RNBLKS, #1;
+
+ _R( _a, _b, _c, _d, _e, F4, 60,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F4, 61,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _d, _e, _a, _b, _c, F4, 62,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _c, _d, _e, _a, _b, F4, 63,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+
+ beq .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+ _R( _b, _c, _d, _e, _a, F4, 64,
+ WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 65,
+ WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 66,
+ WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 67,
+ WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F4, 68,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 69,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 70,
+ WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 71,
+ WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F4, 72,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 73,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 74,
+ WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 75,
+ WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _e, _a, _b, _c, _d, F4, 76,
+ WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 77,
+ WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 78,
+ WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 79,
+ WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ b .Loop;
+
+.Lend:
+ /* Transform 64-79 */
+ R( _b, _c, _d, _e, _a, F4, 64 );
+ R( _a, _b, _c, _d, _e, F4, 65 );
+ R( _e, _a, _b, _c, _d, F4, 66 );
+ R( _d, _e, _a, _b, _c, F4, 67 );
+ R( _c, _d, _e, _a, _b, F4, 68 );
+ R( _b, _c, _d, _e, _a, F4, 69 );
+ R( _a, _b, _c, _d, _e, F4, 70 );
+ R( _e, _a, _b, _c, _d, F4, 71 );
+ R( _d, _e, _a, _b, _c, F4, 72 );
+ R( _c, _d, _e, _a, _b, F4, 73 );
+ R( _b, _c, _d, _e, _a, F4, 74 );
+ R( _a, _b, _c, _d, _e, F4, 75 );
+ R( _e, _a, _b, _c, _d, F4, 76 );
+ R( _d, _e, _a, _b, _c, F4, 77 );
+ R( _c, _d, _e, _a, _b, F4, 78 );
+ R( _b, _c, _d, _e, _a, F4, 79 );
+
+ mov sp, ROLDSTACK;
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ /*vpop {q4-q7};*/
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ pop {r4-r12, pc};
+
+.Ldo_nothing:
+ bx lr
+ENDPROC(sha1_transform_neon)
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 76cd976230b..84f2a756588 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -23,32 +23,27 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
+#include <asm/crypto/sha1.h>
-struct SHA1_CTX {
- uint32_t h0,h1,h2,h3,h4;
- u64 count;
- u8 data[SHA1_BLOCK_SIZE];
-};
-asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
+asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
static int sha1_init(struct shash_desc *desc)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
- memset(sctx, 0, sizeof(*sctx));
- sctx->h0 = SHA1_H0;
- sctx->h1 = SHA1_H1;
- sctx->h2 = SHA1_H2;
- sctx->h3 = SHA1_H3;
- sctx->h4 = SHA1_H4;
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
return 0;
}
-static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
+static int __sha1_update(struct sha1_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
{
unsigned int done = 0;
@@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->data + partial, data, done);
- sha1_block_data_order(sctx, sctx->data, 1);
+ memcpy(sctx->buffer + partial, data, done);
+ sha1_block_data_order(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
- sha1_block_data_order(sctx, data + done, rounds);
+ sha1_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
- memcpy(sctx->data, data + done, len - done);
+ memcpy(sctx->buffer, data + done, len - done);
return 0;
}
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
- memcpy(sctx->data + partial, data, len);
+ memcpy(sctx->buffer + partial, data, len);
return 0;
}
res = __sha1_update(sctx, data, len, partial);
return res;
}
+EXPORT_SYMBOL_GPL(sha1_update_arm);
/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
@@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
/* We need to fill a whole block for __sha1_update() */
if (padlen <= 56) {
sctx->count += padlen;
- memcpy(sctx->data + index, padding, padlen);
+ memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_update(sctx, padding, padlen, index);
}
@@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
/* Store state in digest */
for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
+ dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
@@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
static int sha1_export(struct shash_desc *desc, void *out)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
@@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
static int sha1_import(struct shash_desc *desc, const void *in)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
@@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in)
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init,
- .update = sha1_update,
+ .update = sha1_update_arm,
.final = sha1_final,
.export = sha1_export,
.import = sha1_import,
- .descsize = sizeof(struct SHA1_CTX),
- .statesize = sizeof(struct SHA1_CTX),
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
new file mode 100644
index 00000000000..6f1b411b1d5
--- /dev/null
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -0,0 +1,197 @@
+/*
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * ARM NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha1_generic.c and sha1_ssse3_glue.c:
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/crypto/sha1.h>
+
+
+asmlinkage void sha1_transform_neon(void *state_h, const char *data,
+ unsigned int rounds);
+
+
+static int sha1_neon_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
+ return 0;
+}
+
+static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count += len;
+
+ if (partial) {
+ done = SHA1_BLOCK_SIZE - partial;
+ memcpy(sctx->buffer + partial, data, done);
+ sha1_transform_neon(sctx->state, sctx->buffer, 1);
+ }
+
+ if (len - done >= SHA1_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+ sha1_transform_neon(sctx->state, data + done, rounds);
+ done += rounds * SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buffer, data + done, len - done);
+
+ return 0;
+}
+
+static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+ int res;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA1_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buffer + partial, data, len);
+
+ return 0;
+ }
+
+ if (!may_use_simd()) {
+ res = sha1_update_arm(desc, data, len);
+ } else {
+ kernel_neon_begin();
+ res = __sha1_neon_update(desc, data, len, partial);
+ kernel_neon_end();
+ }
+
+ return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA1_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+ if (!may_use_simd()) {
+ sha1_update_arm(desc, padding, padlen);
+ sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
+ } else {
+ kernel_neon_begin();
+ /* We need to fill a whole block for __sha1_neon_update() */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buffer + index, padding, padlen);
+ } else {
+ __sha1_neon_update(desc, padding, padlen, index);
+ }
+ __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+ kernel_neon_end();
+ }
+
+ /* Store state in digest */
+ for (i = 0; i < 5; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_neon_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_neon_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_neon_init,
+ .update = sha1_neon_update,
+ .final = sha1_neon_final,
+ .export = sha1_neon_export,
+ .import = sha1_neon_import,
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_neon_mod_init(void)
+{
+ if (!cpu_has_neon())
+ return -ENODEV;
+
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_neon_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_neon_mod_init);
+module_exit(sha1_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
+MODULE_ALIAS("sha1");
diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S
new file mode 100644
index 00000000000..fe99472e507
--- /dev/null
+++ b/arch/arm/crypto/sha512-armv7-neon.S
@@ -0,0 +1,455 @@
+/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code 32
+.fpu neon
+
+.text
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
+ rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3; \
+ \
+ /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+ /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+ \
+ /**** S0(w[1:2]) */ \
+ \
+ /* w[0:1] += w[9:10] */ \
+ /* RT23q = rw1:rw2 */ \
+ vext.u64 RT23q, rw01q, rw23q, #1; \
+ vadd.u64 rw0, rw9; \
+ vadd.u64 rg, rg, RT0; \
+ vadd.u64 rw1, rw10;\
+ vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+ \
+ vshr.u64 RT45q, RT23q, #1; \
+ vshl.u64 RT67q, RT23q, #64 - 1; \
+ vshr.u64 RT01q, RT23q, #8; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ vshl.u64 RT67q, RT23q, #64 - 8; \
+ veor.u64 RT45q, RT45q, RT01q; \
+ vshr.u64 RT01q, RT23q, #7; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ \
+ /**** S1(w[14:15]) */ \
+ vshr.u64 RT23q, rw1415q, #6; \
+ veor.u64 RT01q, RT01q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #19; \
+ vshl.u64 RT67q, rw1415q, #64 - 19; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #61; \
+ veor.u64 RT23q, RT23q, RT67q; \
+ vshl.u64 RT67q, rw1415q, #64 - 61; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+ veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+ /* w[0:1] += S(w[14:15]) */ \
+ vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
+ interleave_op1, arg1, interleave_op2, arg2) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op1(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ interleave_op2(arg2); \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+ vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+ vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+ENTRY(sha512_transform_neon)
+ /* Input:
+ * %r0: SHA512_CONTEXT
+ * %r1: data
+ * %r2: u64 k[] constants
+ * %r3: nblks
+ */
+ push {%lr};
+
+ mov %lr, #0;
+
+ /* Load context to d0-d7 */
+ vld1.64 {RA-RD}, [%r0]!;
+ vld1.64 {RE-RH}, [%r0];
+ sub %r0, #(4*8);
+
+ /* Load input to w[16], d16-d31 */
+ /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+ vld1.64 {RW0-RW3}, [%r1]!;
+ vld1.64 {RW4-RW7}, [%r1]!;
+ vld1.64 {RW8-RW11}, [%r1]!;
+ vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+ /* byteswap */
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ /* EABI says that d8-d15 must be preserved by callee. */
+ /*vpush {RT0-RT7};*/
+
+.Loop:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+ RW23q, RW1415q, RW9, RW10, dummy, _);
+ b .Lenter_rounds;
+
+.Loop_rounds:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+ RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
+ RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
+ RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
+ RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
+ RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
+ RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+ add %lr, #16;
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
+ RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+ cmp %lr, #64;
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
+ RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+ bne .Loop_rounds;
+
+ subs %r3, #1;
+
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
+ vadd_RT01q, RW1415q, dummy, _);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ beq .Lhandle_tail;
+ vld1.64 {RW0-RW3}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+#endif
+ vld1.64 {RW4-RW7}, [%r1]!;
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+ vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+#endif
+ vld1.64 {RW8-RW11}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+#endif
+ vld1.64 {RW12-RW15}, [%r1]!;
+ vadd_rg_RT0(RA);
+ vadd_rg_RT1(RA);
+
+ /* Load context */
+ vld1.64 {RT0-RT3}, [%r0]!;
+ vld1.64 {RT4-RT7}, [%r0];
+ sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ vadd.u64 RA, RT0;
+ vadd.u64 RB, RT1;
+ vadd.u64 RC, RT2;
+ vadd.u64 RD, RT3;
+ vadd.u64 RE, RT4;
+ vadd.u64 RF, RT5;
+ vadd.u64 RG, RT6;
+ vadd.u64 RH, RT7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+ sub RK, $(8*80);
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ mov %lr, #0;
+ sub %r0, #(4*8);
+
+ b .Loop;
+
+.Lhandle_tail:
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+ vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+
+ /* Load context to d16-d23 */
+ vld1.64 {RW0-RW3}, [%r0]!;
+ vadd_rg_RT0(RA);
+ vld1.64 {RW4-RW7}, [%r0];
+ vadd_rg_RT1(RA);
+ sub %r0, #(4*8);
+
+ vadd.u64 RA, RW0;
+ vadd.u64 RB, RW1;
+ vadd.u64 RC, RW2;
+ vadd.u64 RD, RW3;
+ vadd.u64 RE, RW4;
+ vadd.u64 RF, RW5;
+ vadd.u64 RG, RW6;
+ vadd.u64 RH, RW7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+
+ /* Clear used registers */
+ /* d16-d31 */
+ veor.u64 RW01q, RW01q;
+ veor.u64 RW23q, RW23q;
+ veor.u64 RW45q, RW45q;
+ veor.u64 RW67q, RW67q;
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ veor.u64 RW89q, RW89q;
+ veor.u64 RW1011q, RW1011q;
+ veor.u64 RW1213q, RW1213q;
+ veor.u64 RW1415q, RW1415q;
+ /* d8-d15 */
+ /*vpop {RT0-RT7};*/
+ /* d0-d7 (q0-q3) */
+ veor.u64 %q0, %q0;
+ veor.u64 %q1, %q1;
+ veor.u64 %q2, %q2;
+ veor.u64 %q3, %q3;
+
+ pop {%pc};
+ENDPROC(sha512_transform_neon)
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
new file mode 100644
index 00000000000..0d2758ff5e1
--- /dev/null
+++ b/arch/arm/crypto/sha512_neon_glue.c
@@ -0,0 +1,305 @@
+/*
+ * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha512_ssse3_glue.c:
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+
+static const u64 sha512_k[] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+
+asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
+ const u64 k[], unsigned int num_blks);
+
+
+static int sha512_neon_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA512_H0;
+ sctx->state[1] = SHA512_H1;
+ sctx->state[2] = SHA512_H2;
+ sctx->state[3] = SHA512_H3;
+ sctx->state[4] = SHA512_H4;
+ sctx->state[5] = SHA512_H5;
+ sctx->state[6] = SHA512_H6;
+ sctx->state[7] = SHA512_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count[0] += len;
+ if (sctx->count[0] < len)
+ sctx->count[1]++;
+
+ if (partial) {
+ done = SHA512_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
+ }
+
+ if (len - done >= SHA512_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+ sha512_transform_neon(sctx->state, data + done, sha512_k,
+ rounds);
+
+ done += rounds * SHA512_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+
+ return 0;
+}
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+ int res;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA512_BLOCK_SIZE) {
+ sctx->count[0] += len;
+ if (sctx->count[0] < len)
+ sctx->count[1]++;
+ memcpy(sctx->buf + partial, data, len);
+
+ return 0;
+ }
+
+ if (!may_use_simd()) {
+ res = crypto_sha512_update(desc, data, len);
+ } else {
+ kernel_neon_begin();
+ res = __sha512_neon_update(desc, data, len, partial);
+ kernel_neon_end();
+ }
+
+ return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be64 *dst = (__be64 *)out;
+ __be64 bits[2];
+ static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+ /* save number of bits */
+ bits[1] = cpu_to_be64(sctx->count[0] << 3);
+ bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+ /* Pad out to 112 mod 128 and append length */
+ index = sctx->count[0] & 0x7f;
+ padlen = (index < 112) ? (112 - index) : ((128+112) - index);
+
+ if (!may_use_simd()) {
+ crypto_sha512_update(desc, padding, padlen);
+ crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
+ } else {
+ kernel_neon_begin();
+ /* We need to fill a whole block for __sha512_neon_update() */
+ if (padlen <= 112) {
+ sctx->count[0] += padlen;
+ if (sctx->count[0] < padlen)
+ sctx->count[1]++;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha512_neon_update(desc, padding, padlen, index);
+ }
+ __sha512_neon_update(desc, (const u8 *)&bits,
+ sizeof(bits), 112);
+ kernel_neon_end();
+ }
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be64(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_neon_export(struct shash_desc *desc, void *out)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_neon_import(struct shash_desc *desc, const void *in)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha384_neon_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA384_H0;
+ sctx->state[1] = SHA384_H1;
+ sctx->state[2] = SHA384_H2;
+ sctx->state[3] = SHA384_H3;
+ sctx->state[4] = SHA384_H4;
+ sctx->state[5] = SHA384_H5;
+ sctx->state[6] = SHA384_H6;
+ sctx->state[7] = SHA384_H7;
+
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA512_DIGEST_SIZE];
+
+ sha512_neon_final(desc, D);
+
+ memcpy(hash, D, SHA384_DIGEST_SIZE);
+ memset(D, 0, SHA512_DIGEST_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = sha512_neon_init,
+ .update = sha512_neon_update,
+ .final = sha512_neon_final,
+ .export = sha512_neon_export,
+ .import = sha512_neon_import,
+ .descsize = sizeof(struct sha512_state),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_neon_init,
+ .update = sha512_neon_update,
+ .final = sha384_neon_final,
+ .export = sha512_neon_export,
+ .import = sha512_neon_import,
+ .descsize = sizeof(struct sha512_state),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha512_neon_mod_init(void)
+{
+ if (!cpu_has_neon())
+ return -ENODEV;
+
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_neon_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha512_neon_mod_init);
+module_exit(sha512_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
+
+MODULE_ALIAS("sha512");
+MODULE_ALIAS("sha384");
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 57f0584e8d9..f67fd3afebd 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -24,6 +24,8 @@
#include <asm/domain.h>
#include <asm/opcodes-virt.h>
#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
#define IOMEM(x) (x)
@@ -179,10 +181,10 @@
* Get current thread_info.
*/
.macro get_thread_info, rd
- ARM( mov \rd, sp, lsr #13 )
+ ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT )
THUMB( mov \rd, sp )
- THUMB( lsr \rd, \rd, #13 )
- mov \rd, \rd, lsl #13
+ THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT )
+ mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
.endm
/*
@@ -425,4 +427,25 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#endif
.endm
+ .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+ .macro ret\c, reg
+#if __LINUX_ARM_ARCH__ < 6
+ mov\c pc, \reg
+#else
+ .ifeqs "\reg", "lr"
+ bx\c \reg
+ .else
+ mov\c pc, \reg
+ .endif
+#endif
+ .endm
+ .endr
+
+ .macro ret.w, reg
+ ret \reg
+#ifdef CONFIG_THUMB2_KERNEL
+ nop
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 8c2b7321a47..963a2515906 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -62,17 +62,18 @@
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_INTEL 0x69
-#define ARM_CPU_PART_ARM1136 0xB360
-#define ARM_CPU_PART_ARM1156 0xB560
-#define ARM_CPU_PART_ARM1176 0xB760
-#define ARM_CPU_PART_ARM11MPCORE 0xB020
-#define ARM_CPU_PART_CORTEX_A8 0xC080
-#define ARM_CPU_PART_CORTEX_A9 0xC090
-#define ARM_CPU_PART_CORTEX_A5 0xC050
-#define ARM_CPU_PART_CORTEX_A15 0xC0F0
-#define ARM_CPU_PART_CORTEX_A7 0xC070
-#define ARM_CPU_PART_CORTEX_A12 0xC0D0
-#define ARM_CPU_PART_CORTEX_A17 0xC0E0
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136 0x4100b360
+#define ARM_CPU_PART_ARM1156 0x4100b560
+#define ARM_CPU_PART_ARM1176 0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE 0x4100b020
+#define ARM_CPU_PART_CORTEX_A8 0x4100c080
+#define ARM_CPU_PART_CORTEX_A9 0x4100c090
+#define ARM_CPU_PART_CORTEX_A5 0x4100c050
+#define ARM_CPU_PART_CORTEX_A7 0x4100c070
+#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
@@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
return (read_cpuid_id() & 0xFF000000) >> 24;
}
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+ return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
{
return read_cpuid_id() & 0xFFF0;
}
static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
{
- return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+ return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
}
static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/include/asm/crypto/sha1.h
new file mode 100644
index 00000000000..75e6a417416
--- /dev/null
+++ b/arch/arm/include/asm/crypto/sha1.h
@@ -0,0 +1,10 @@
+#ifndef ASM_ARM_CRYPTO_SHA1_H
+#define ASM_ARM_CRYPTO_SHA1_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+#endif
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 88d61815f0c..469a2b30fa2 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -35,5 +35,5 @@
\symbol_name:
mov r8, lr
arch_irq_handler_default
- mov pc, r8
+ ret r8
.endm
diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index 74a8b84f3cb..74be7c22035 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -221,15 +221,6 @@
# endif
#endif
-#ifdef CONFIG_CPU_V7
-# ifdef CPU_NAME
-# undef MULTI_CPU
-# define MULTI_CPU
-# else
-# define CPU_NAME cpu_v7
-# endif
-#endif
-
#ifdef CONFIG_CPU_V7M
# ifdef CPU_NAME
# undef MULTI_CPU
@@ -248,6 +239,15 @@
# endif
#endif
+#ifdef CONFIG_CPU_V7
+/*
+ * Cortex-A9 needs a different suspend/resume function, so we need
+ * multiple CPU support for ARMv7 anyway.
+ */
+# undef MULTI_CPU
+# define MULTI_CPU
+#endif
+
#ifndef MULTI_CPU
#define cpu_proc_init __glue(CPU_NAME,_proc_init)
#define cpu_proc_fin __glue(CPU_NAME,_proc_fin)
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 94060adba17..57ff7f2a308 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster);
int __init mcpm_sync_init(
void (*power_up_setup)(unsigned int affinity_level));
+/**
+ * mcpm_loopback - make a run through the MCPM low-level code
+ *
+ * @cache_disable: pointer to function performing cache disabling
+ *
+ * This exercises the MCPM machinery by soft resetting the CPU and branching
+ * to the MCPM low-level entry code before returning to the caller.
+ * The @cache_disable function must do the necessary cache disabling to
+ * let the regular kernel init code turn it back on as if the CPU was
+ * hotplugged in. The MCPM state machine is set as if the cluster was
+ * initialized meaning the power_up_setup callback passed to mcpm_sync_init()
+ * will be invoked for all affinity levels. This may be useful to initialize
+ * some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes.
+ */
+int __init mcpm_loopback(void (*cache_disable)(void));
+
void __init mcpm_smp_set_ops(void);
#else
diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h
new file mode 100644
index 00000000000..f652ad65840
--- /dev/null
+++ b/arch/arm/include/asm/mcs_spinlock.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_MCS_LOCK_H
+#define __ASM_MCS_LOCK_H
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+/* MCS spin-locking. */
+#define arch_mcs_spin_lock_contended(lock) \
+do { \
+ /* Ensure prior stores are observed before we enter wfe. */ \
+ smp_mb(); \
+ while (!(smp_load_acquire(lock))) \
+ wfe(); \
+} while (0) \
+
+#define arch_mcs_spin_unlock_contended(lock) \
+do { \
+ smp_store_release(lock, 1); \
+ dsb_sev(); \
+} while (0)
+
+#endif /* CONFIG_SMP */
+#endif /* __ASM_MCS_LOCK_H */
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 2b751464d6f..e731018869a 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -91,9 +91,7 @@
* of this define that was meant to.
* Fortunately, there is no reference for this in noMMU mode, for now.
*/
-#ifndef TASK_SIZE
-#define TASK_SIZE (CONFIG_DRAM_SIZE)
-#endif
+#define TASK_SIZE UL(0xffffffff)
#ifndef TASK_UNMAPPED_BASE
#define TASK_UNMAPPED_BASE UL(0x00000000)
@@ -150,13 +148,11 @@
/*
* PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
- * memory. This is used for XIP and NoMMU kernels, or by kernels which
- * have their own mach/memory.h. Assembly code must always use
+ * memory. This is used for XIP and NoMMU kernels, and on platforms that don't
+ * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
* PLAT_PHYS_OFFSET and not PHYS_OFFSET.
*/
-#ifndef PLAT_PHYS_OFFSET
#define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET)
-#endif
#ifndef __ASSEMBLY__
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 755877527cf..c3a83691af8 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,15 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/*
- * The ARMv7 CPU PMU supports up to 32 event counters.
- */
-#define ARMPMU_MAX_HWEVENTS 32
-
-#define HW_OP_UNSUPPORTED 0xFFFF
-#define C(_x) PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED 0xFFFF
-
#ifdef CONFIG_HW_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 626989fec4d..9fd61c72a33 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -43,7 +43,7 @@
#define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
-#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
+#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
@@ -72,6 +72,7 @@
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */
#define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */
+#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 85c60adc8b6..06e0bc0f8b0 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -79,18 +79,19 @@
#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
-#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
#define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */
-#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */
-#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */
+#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55)
+#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */
+#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
-#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
+#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
+#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
+#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58)
/*
* To be used in assembly code with the upper page attributes.
@@ -207,27 +208,32 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
-#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
+#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \
+ : !!(pmd_val(pmd) & (val)))
+#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val)))
+
+#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF))
#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
+#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
+#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd))
+#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING))
#endif
#define PMD_BIT_FUNC(fn,op) \
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
-PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -241,8 +247,8 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
- PMD_SECT_VALID | PMD_SECT_NONE;
+ const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY |
+ L_PMD_SECT_VALID | L_PMD_SECT_NONE;
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
return pmd;
}
@@ -253,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
BUG_ON(addr >= TASK_SIZE);
/* create a faulting entry if PROT_NONE protected */
- if (pmd_val(pmd) & PMD_SECT_NONE)
- pmd_val(pmd) &= ~PMD_SECT_VALID;
+ if (pmd_val(pmd) & L_PMD_SECT_NONE)
+ pmd_val(pmd) &= ~L_PMD_SECT_VALID;
+
+ if (pmd_write(pmd) && pmd_dirty(pmd))
+ pmd_val(pmd) &= ~PMD_SECT_AP2;
+ else
+ pmd_val(pmd) |= PMD_SECT_AP2;
*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
flush_pmd_entry(pmdp);
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5478e5d6ad8..01baef07cd0 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0)
+#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \
+ : !!(pte_val(pte) & (val)))
+#define pte_isclear(pte, val) (!(pte_val(pte) & (val)))
+
#define pte_none(pte) (!pte_val(pte))
-#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT)
-#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID)
+#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT))
+#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID))
#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
-#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN))
+#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY))
+#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY))
+#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG))
+#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN))
#define pte_special(pte) (0)
#define pte_valid_user(pte) \
- (pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
+ (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
#if __LINUX_ARM_ARCH__ < 6
static inline void __sync_icache_dcache(pte_t pteval)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index ae1919be8f9..0b648c54129 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -42,6 +42,25 @@ struct arm_pmu_platdata {
#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * The ARMv7 CPU PMU supports up to 32 event counters.
+ */
+#define ARMPMU_MAX_HWEVENTS 32
+
+#define HW_OP_UNSUPPORTED 0xFFFF
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED 0xFFFF
+
+#define PERF_MAP_ALL_UNSUPPORTED \
+ [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
+
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
+[0 ... C(MAX) - 1] = { \
+ [0 ... C(OP_MAX) - 1] = { \
+ [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
+ }, \
+}
+
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index c877654fe3b..601264d983f 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs)
#define instruction_pointer(regs) (regs)->ARM_pc
+#ifdef CONFIG_THUMB2_KERNEL
+#define frame_pointer(regs) (regs)->ARM_r7
+#else
+#define frame_pointer(regs) (regs)->ARM_fp
+#endif
+
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 0393fbab8dd..bfe163c4002 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
static inline bool scu_a9_has_base(void)
{
- return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+ return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
}
static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h
index 4d0a16441b2..7722201ead1 100644
--- a/arch/arm/include/asm/stacktrace.h
+++ b/arch/arm/include/asm/stacktrace.h
@@ -1,13 +1,28 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
+#include <asm/ptrace.h>
+
struct stackframe {
+ /*
+ * FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled
+ * and R11 otherwise.
+ */
unsigned long fp;
unsigned long sp;
unsigned long lr;
unsigned long pc;
};
+static __always_inline
+void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame)
+{
+ frame->fp = frame_pointer(regs);
+ frame->sp = regs->ARM_sp;
+ frame->lr = regs->ARM_lr;
+ frame->pc = regs->ARM_pc;
+}
+
extern int unwind_frame(struct stackframe *frame);
extern void walk_stackframe(struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index e4e4208a913..fc44d3761f9 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -14,9 +14,10 @@
#include <linux/compiler.h>
#include <asm/fpstate.h>
+#include <asm/page.h>
#define THREAD_SIZE_ORDER 1
-#define THREAD_SIZE 8192
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP (THREAD_SIZE - 8)
#ifndef __ASSEMBLY__
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d95799b6e..a4cd7af475e 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
@@ -224,7 +242,7 @@ static inline void set_fs(mm_segment_t fs)
#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
#define user_addr_max() \
- (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+ (segment_eq(get_fs(), KERNEL_DS) ? ~0UL : get_fs())
/*
* The "__xxx" versions of the user access functions do not verify the
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 43876245fc5..21ca0cebcab 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,17 @@
#include <uapi/asm/unistd.h>
+/*
+ * This may need to be greater than __NR_last_syscall+1 in order to
+ * account for the padding in the syscall table
+ */
#define __NR_syscalls (384)
+
+/*
+ * *NOTE*: This is a ghost syscall private to the kernel. Only the
+ * __kuser_cmpxchg code in entry-armv.S should be aware of its
+ * existence. Don't ever use this from user code.
+ */
#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
#define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ba94446c72d..acd5b66ea3a 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -411,11 +411,6 @@
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
/*
- * This may need to be greater than __NR_last_syscall+1 in order to
- * account for the padding in the syscall table
- */
-
-/*
* The following SWIs are ARM private.
*/
#define __ARM_NR_BASE (__NR_SYSCALL_BASE+0x0f0000)
@@ -426,12 +421,6 @@
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
/*
- * *NOTE*: This is a ghost syscall private to the kernel. Only the
- * __kuser_cmpxchg code in entry-armv.S should be aware of its
- * existence. Don't ever use this from user code.
- */
-
-/*
* The following syscalls are obsolete and no longer available for EABI.
*/
#if !defined(__KERNEL__)
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 14f7c3b1463..78c91b5f97d 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -90,7 +90,7 @@ ENTRY(printascii)
ldrneb r1, [r0], #1
teqne r1, #0
bne 1b
- mov pc, lr
+ ret lr
ENDPROC(printascii)
ENTRY(printch)
@@ -105,7 +105,7 @@ ENTRY(debug_ll_addr)
addruart r2, r3, ip
str r2, [r0]
str r3, [r1]
- mov pc, lr
+ ret lr
ENDPROC(debug_ll_addr)
#endif
@@ -116,7 +116,7 @@ ENTRY(printascii)
mov r0, #0x04 @ SYS_WRITE0
ARM( svc #0x123456 )
THUMB( svc #0xab )
- mov pc, lr
+ ret lr
ENDPROC(printascii)
ENTRY(printch)
@@ -125,14 +125,14 @@ ENTRY(printch)
mov r0, #0x03 @ SYS_WRITEC
ARM( svc #0x123456 )
THUMB( svc #0xab )
- mov pc, lr
+ ret lr
ENDPROC(printch)
ENTRY(debug_ll_addr)
mov r2, #0
str r2, [r0]
str r2, [r1]
- mov pc, lr
+ ret lr
ENDPROC(debug_ll_addr)
#endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 52a949a8077..36276cdccfb 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -224,7 +224,7 @@ svc_preempt:
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
- moveq pc, r8 @ go again
+ reteq r8 @ go again
b 1b
#endif
@@ -490,7 +490,7 @@ ENDPROC(__und_usr)
.pushsection .fixup, "ax"
.align 2
4: str r4, [sp, #S_PC] @ retry current instruction
- mov pc, r9
+ ret r9
.popsection
.pushsection __ex_table,"a"
.long 1b, 4b
@@ -552,7 +552,7 @@ call_fpe:
#endif
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
- moveq pc, lr
+ reteq lr
and r8, r0, #0x00000f00 @ mask out CP number
THUMB( lsr r8, r8, #8 )
mov r7, #1
@@ -571,33 +571,33 @@ call_fpe:
THUMB( add pc, r8 )
nop
- movw_pc lr @ CP#0
+ ret.w lr @ CP#0
W(b) do_fpe @ CP#1 (FPE)
W(b) do_fpe @ CP#2 (FPE)
- movw_pc lr @ CP#3
+ ret.w lr @ CP#3
#ifdef CONFIG_CRUNCH
b crunch_task_enable @ CP#4 (MaverickCrunch)
b crunch_task_enable @ CP#5 (MaverickCrunch)
b crunch_task_enable @ CP#6 (MaverickCrunch)
#else
- movw_pc lr @ CP#4
- movw_pc lr @ CP#5
- movw_pc lr @ CP#6
+ ret.w lr @ CP#4
+ ret.w lr @ CP#5
+ ret.w lr @ CP#6
#endif
- movw_pc lr @ CP#7
- movw_pc lr @ CP#8
- movw_pc lr @ CP#9
+ ret.w lr @ CP#7
+ ret.w lr @ CP#8
+ ret.w lr @ CP#9
#ifdef CONFIG_VFP
W(b) do_vfp @ CP#10 (VFP)
W(b) do_vfp @ CP#11 (VFP)
#else
- movw_pc lr @ CP#10 (VFP)
- movw_pc lr @ CP#11 (VFP)
+ ret.w lr @ CP#10 (VFP)
+ ret.w lr @ CP#11 (VFP)
#endif
- movw_pc lr @ CP#12
- movw_pc lr @ CP#13
- movw_pc lr @ CP#14 (Debug)
- movw_pc lr @ CP#15 (Control)
+ ret.w lr @ CP#12
+ ret.w lr @ CP#13
+ ret.w lr @ CP#14 (Debug)
+ ret.w lr @ CP#15 (Control)
#ifdef NEED_CPU_ARCHITECTURE
.align 2
@@ -649,7 +649,7 @@ ENTRY(fp_enter)
.popsection
ENTRY(no_fp)
- mov pc, lr
+ ret lr
ENDPROC(no_fp)
__und_usr_fault_32:
@@ -745,7 +745,7 @@ ENDPROC(__switch_to)
#ifdef CONFIG_ARM_THUMB
bx \reg
#else
- mov pc, \reg
+ ret \reg
#endif
.endm
@@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup:
#if __LINUX_ARM_ARCH__ < 6
bcc kuser_cmpxchg32_fixup
#endif
- mov pc, lr
+ ret lr
.previous
#else
@@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup:
subs r8, r4, r7
rsbcss r8, r8, #(2b - 1b)
strcs r7, [sp, #S_PC]
- mov pc, lr
+ ret lr
.previous
#else
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7139d4a7dea..e52fe5a2d84 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -8,6 +8,7 @@
* published by the Free Software Foundation.
*/
+#include <asm/assembler.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/unwind.h>
@@ -88,7 +89,7 @@ ENTRY(ret_from_fork)
cmp r5, #0
movne r0, r4
adrne lr, BSYM(1f)
- movne pc, r5
+ retne r5
1: get_thread_info tsk
b ret_slow_syscall
ENDPROC(ret_from_fork)
@@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old)
.macro mcount_exit
ldmia sp!, {r0-r3, ip, lr}
- mov pc, ip
+ ret ip
.endm
ENTRY(__gnu_mcount_nc)
@@ -298,7 +299,7 @@ UNWIND(.fnstart)
#ifdef CONFIG_DYNAMIC_FTRACE
mov ip, lr
ldmia sp!, {lr}
- mov pc, ip
+ ret ip
#else
__mcount
#endif
@@ -333,12 +334,12 @@ return_to_handler:
bl ftrace_return_to_handler
mov lr, r0 @ r0 has real ret addr
ldmia sp!, {r0-r3}
- mov pc, lr
+ ret lr
#endif
ENTRY(ftrace_stub)
.Lftrace_stub:
- mov pc, lr
+ ret lr
ENDPROC(ftrace_stub)
#endif /* CONFIG_FUNCTION_TRACER */
@@ -561,7 +562,7 @@ sys_mmap2:
streq r5, [sp, #4]
beq sys_mmap_pgoff
mov r0, #-EINVAL
- mov pc, lr
+ ret lr
#else
str r5, [sp, #4]
b sys_mmap_pgoff
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 5d702f8900b..8db307d0954 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -240,12 +240,6 @@
movs pc, lr @ return & move spsr_svc into cpsr
.endm
- @
- @ 32-bit wide "mov pc, reg"
- @
- .macro movw_pc, reg
- mov pc, \reg
- .endm
#else /* CONFIG_THUMB2_KERNEL */
.macro svc_exit, rpsr, irq = 0
.if \irq != 0
@@ -304,14 +298,6 @@
movs pc, lr @ return & move spsr_svc into cpsr
.endm
#endif /* ifdef CONFIG_CPU_V7M / else */
-
- @
- @ 32-bit wide "mov pc, reg"
- @
- .macro movw_pc, reg
- mov pc, \reg
- nop
- .endm
#endif /* !CONFIG_THUMB2_KERNEL */
/*
diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S
index 207f9d65201..8dd26e1a9bd 100644
--- a/arch/arm/kernel/fiqasm.S
+++ b/arch/arm/kernel/fiqasm.S
@@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs)
ldr lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
- mov pc, lr
+ ret lr
ENDPROC(__set_fiq_regs)
ENTRY(__get_fiq_regs)
@@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs)
str lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
- mov pc, lr
+ ret lr
ENDPROC(__get_fiq_regs)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 572a38335c9..8733012d231 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -10,6 +10,7 @@
* published by the Free Software Foundation.
*
*/
+#include <asm/assembler.h>
#define ATAG_CORE 0x54410001
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
@@ -61,10 +62,10 @@ __vet_atags:
cmp r5, r6
bne 1f
-2: mov pc, lr @ atag/dtb pointer is ok
+2: ret lr @ atag/dtb pointer is ok
1: mov r2, #0
- mov pc, lr
+ ret lr
ENDPROC(__vet_atags)
/*
@@ -162,7 +163,7 @@ __lookup_processor_type:
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
-2: mov pc, lr
+2: ret lr
ENDPROC(__lookup_processor_type)
/*
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 716249cc2ee..cc176b67c13 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -82,7 +82,7 @@ ENTRY(stext)
adr lr, BSYM(1f) @ return (PIC) address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
1: b __after_proc_init
ENDPROC(stext)
@@ -119,7 +119,7 @@ ENTRY(secondary_startup)
mov r13, r12 @ __secondary_switched address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
ENDPROC(secondary_startup)
ENTRY(__secondary_switched)
@@ -164,7 +164,7 @@ __after_proc_init:
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
- mov pc, r13
+ ret r13
ENDPROC(__after_proc_init)
.ltorg
@@ -254,7 +254,7 @@ ENTRY(__setup_mpu)
orr r0, r0, #CR_M @ Set SCTRL.M (MPU on)
mcr p15, 0, r0, c1, c0, 0 @ Enable MPU
isb
- mov pc,lr
+ ret lr
ENDPROC(__setup_mpu)
#endif
#include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 2c35f0ff2fd..664eee8c4a2 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -140,7 +140,7 @@ ENTRY(stext)
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
1: b __enable_mmu
ENDPROC(stext)
.ltorg
@@ -335,7 +335,7 @@ __create_page_tables:
sub r4, r4, #0x1000 @ point to the PGD table
mov r4, r4, lsr #ARCH_PGD_SHIFT
#endif
- mov pc, lr
+ ret lr
ENDPROC(__create_page_tables)
.ltorg
.align
@@ -383,7 +383,7 @@ ENTRY(secondary_startup)
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
@ (return control reg)
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
ENDPROC(secondary_startup)
/*
@@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on)
instr_sync
mov r3, r3
mov r3, r13
- mov pc, r3
+ ret r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
@@ -487,7 +487,7 @@ __fixup_smp:
orr r4, r4, #0x0000b000
orr r4, r4, #0x00000020 @ val 0x4100b020
teq r3, r4 @ ARM 11MPCore?
- moveq pc, lr @ yes, assume SMP
+ reteq lr @ yes, assume SMP
mrc p15, 0, r0, c0, c0, 5 @ read MPIDR
and r0, r0, #0xc0000000 @ multiprocessing extensions and
@@ -500,7 +500,7 @@ __fixup_smp:
orr r4, r4, #0x0000c000
orr r4, r4, #0x00000090
teq r3, r4 @ Check for ARM Cortex-A9
- movne pc, lr @ Not ARM Cortex-A9,
+ retne lr @ Not ARM Cortex-A9,
@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
@ below address check will need to be #ifdef'd or equivalent
@@ -512,7 +512,7 @@ __fixup_smp:
ARM_BE8(rev r0, r0) @ byteswap if big endian
and r0, r0, #0x3 @ number of CPUs
teq r0, #0x0 @ is 1?
- movne pc, lr
+ retne lr
__fixup_smp_on_up:
adr r0, 1f
@@ -539,7 +539,7 @@ smp_on_up:
.text
__do_fixup_smp_on_up:
cmp r4, r5
- movhs pc, lr
+ reths lr
ldmia r4!, {r0, r6}
ARM( str r6, [r0, r3] )
THUMB( add r0, r0, r3 )
@@ -672,7 +672,7 @@ ARM_BE8(rev16 ip, ip)
2: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot
bcc 1b
- mov pc, lr
+ ret lr
#endif
ENDPROC(__fixup_a_pv_table)
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 797b1a6a490..56ce6290c83 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary)
* immediately.
*/
compare_cpu_mode_with_primary r4, r5, r6, r7
- movne pc, lr
+ retne lr
/*
* Once we have given up on one CPU, we do not try to install the
@@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary)
*/
cmp r4, #HYP_MODE
- movne pc, lr @ give up if the CPU is not in HYP mode
+ retne lr @ give up if the CPU is not in HYP mode
/*
* Configure HSCTLR to set correct exception endianness/instruction set
@@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors)
@ fall through
ENTRY(__hyp_set_vectors)
__HVC(0)
- mov pc, lr
+ ret lr
ENDPROC(__hyp_set_vectors)
#ifndef ZIMAGE
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index 2b32978ae90..ad58e565fe9 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -100,7 +100,7 @@ ENTRY(iwmmxt_task_enable)
get_thread_info r10
#endif
4: dec_preempt_count r10, r3
- mov pc, r9 @ normal exit from exception
+ ret r9 @ normal exit from exception
concan_save:
@@ -144,7 +144,7 @@ concan_dump:
wstrd wR15, [r1, #MMX_WR15]
2: teq r0, #0 @ anything to load?
- moveq pc, lr @ if not, return
+ reteq lr @ if not, return
concan_load:
@@ -177,10 +177,10 @@ concan_load:
@ clear CUP/MUP (only if r1 != 0)
teq r1, #0
mov r2, #0
- moveq pc, lr
+ reteq lr
tmcr wCon, r2
- mov pc, lr
+ ret lr
/*
* Back up Concan regs to save area and disable access to them
@@ -266,7 +266,7 @@ ENTRY(iwmmxt_task_copy)
mov r3, lr @ preserve return address
bl concan_dump
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Restore Concan state from given memory address
@@ -302,7 +302,7 @@ ENTRY(iwmmxt_task_restore)
mov r3, lr @ preserve return address
bl concan_load
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Concan handling on task switch
@@ -324,7 +324,7 @@ ENTRY(iwmmxt_task_switch)
add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area
ldr r2, [r2] @ get current Concan owner
teq r2, r3 @ next task owns it?
- movne pc, lr @ no: leave Concan disabled
+ retne lr @ no: leave Concan disabled
1: @ flip Concan access
XSC(eor r1, r1, #0x3)
@@ -351,7 +351,7 @@ ENTRY(iwmmxt_task_release)
eors r0, r0, r1 @ if equal...
streq r0, [r3] @ then clear ownership
msr cpsr_c, r2 @ restore interrupts
- mov pc, lr
+ ret lr
.data
concan_owner:
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 4238bcba9d6..266cba46db3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -560,11 +560,16 @@ user_backtrace(struct frame_tail __user *tail,
struct perf_callchain_entry *entry)
{
struct frame_tail buftail;
+ unsigned long err;
- /* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
return NULL;
- if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
return NULL;
perf_callchain_store(entry, buftail.lr);
@@ -590,6 +595,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
perf_callchain_store(entry, regs->ARM_pc);
+
+ if (!current->mm)
+ return;
+
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
@@ -621,10 +630,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
- fr.fp = regs->ARM_fp;
- fr.sp = regs->ARM_sp;
- fr.lr = regs->ARM_lr;
- fr.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index af9e35e8836..e6a6edbec61 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -233,14 +233,17 @@ static struct of_device_id cpu_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
{.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
- {.compatible = "arm,arm1176-pmu", .data = armv6pmu_init},
- {.compatible = "arm,arm1136-pmu", .data = armv6pmu_init},
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
{.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
{},
};
static struct platform_device_id cpu_pmu_plat_device_ids[] = {
{.name = "arm-pmu"},
+ {.name = "armv6-pmu"},
+ {.name = "armv7-pmu"},
+ {.name = "xscale-pmu"},
{},
};
@@ -250,40 +253,43 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
static int probe_current_pmu(struct arm_pmu *pmu)
{
int cpu = get_cpu();
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
int ret = -ENODEV;
pr_info("probing PMU on CPU %d\n", cpu);
+ switch (read_cpuid_part()) {
/* ARM Ltd CPUs. */
- if (implementor == ARM_CPU_IMP_ARM) {
- switch (part_number) {
- case ARM_CPU_PART_ARM1136:
- case ARM_CPU_PART_ARM1156:
- case ARM_CPU_PART_ARM1176:
- ret = armv6pmu_init(pmu);
- break;
- case ARM_CPU_PART_ARM11MPCORE:
- ret = armv6mpcore_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A8:
- ret = armv7_a8_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A9:
- ret = armv7_a9_pmu_init(pmu);
- break;
- }
- /* Intel CPUs [xscale]. */
- } else if (implementor == ARM_CPU_IMP_INTEL) {
- switch (xscale_cpu_arch_version()) {
- case ARM_CPU_XSCALE_ARCH_V1:
- ret = xscale1pmu_init(pmu);
- break;
- case ARM_CPU_XSCALE_ARCH_V2:
- ret = xscale2pmu_init(pmu);
- break;
+ case ARM_CPU_PART_ARM1136:
+ ret = armv6_1136_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM1156:
+ ret = armv6_1156_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM1176:
+ ret = armv6_1176_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM11MPCORE:
+ ret = armv6mpcore_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A8:
+ ret = armv7_a8_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A9:
+ ret = armv7_a9_pmu_init(pmu);
+ break;
+
+ default:
+ if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+ switch (xscale_cpu_arch_version()) {
+ case ARM_CPU_XSCALE_ARCH_V1:
+ ret = xscale1pmu_init(pmu);
+ break;
+ case ARM_CPU_XSCALE_ARCH_V2:
+ ret = xscale2pmu_init(pmu);
+ break;
+ }
}
+ break;
}
put_cpu();
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 03664b0e8fa..abfeb04f321 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -65,13 +65,11 @@ enum armv6_counters {
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
};
@@ -79,116 +77,31 @@ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
};
enum armv6mpcore_perf_types {
@@ -220,13 +133,11 @@ enum armv6mpcore_perf_types {
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
};
@@ -234,114 +145,26 @@ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
};
static inline unsigned long
@@ -653,9 +476,8 @@ static int armv6_map_event(struct perf_event *event)
&armv6_perf_cache_map, 0xFF);
}
-static int armv6pmu_init(struct arm_pmu *cpu_pmu)
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "v6";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6pmu_disable_event;
@@ -667,7 +489,26 @@ static int armv6pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->map_event = armv6_map_event;
cpu_pmu->num_events = 3;
cpu_pmu->max_period = (1LLU << 32) - 1;
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1136";
+ return 0;
+}
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1156";
+ return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1176";
return 0;
}
@@ -687,7 +528,7 @@ static int armv6mpcore_map_event(struct perf_event *event)
static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "v6mpcore";
+ cpu_pmu->name = "armv6_11mpcore";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6mpcore_pmu_disable_event;
@@ -703,7 +544,17 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
#else
-static int armv6pmu_init(struct arm_pmu *cpu_pmu)
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return -ENODEV;
+}
+
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return -ENODEV;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 1d37568c547..116758b77f9 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -148,137 +148,62 @@ enum krait_perf_types {
* accesses/misses in hardware.
*/
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A9 HW events mapping
*/
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
};
@@ -286,238 +211,83 @@ static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A5 HW events mapping
*/
static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- /*
- * The prefetch counters don't differentiate between the I
- * side and the D side.
- */
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+ /*
+ * The prefetch counters don't differentiate between the I side and the
+ * D side.
+ */
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A15 HW events mapping
*/
static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
@@ -525,123 +295,48 @@ static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- /*
- * Not all performance counters differentiate between read
- * and write accesses/misses so we're not always strictly
- * correct, but it's the best we can do. Writes and reads get
- * combined in these cases.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A7 HW events mapping
*/
static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
@@ -649,123 +344,48 @@ static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A12 HW events mapping
*/
static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
@@ -773,138 +393,60 @@ static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- /*
- * Not all performance counters differentiate between read
- * and write accesses/misses so we're not always strictly
- * correct, but it's the best we can do. Writes and reads get
- * combined in these cases.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Krait HW events mapping
*/
static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
@@ -912,110 +454,31 @@ static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
@@ -1545,7 +1008,7 @@ static u32 armv7_read_num_pmnc_events(void)
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A8";
+ cpu_pmu->name = "armv7_cortex_a8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1554,7 +1017,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A9";
+ cpu_pmu->name = "armv7_cortex_a9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1563,7 +1026,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A5";
+ cpu_pmu->name = "armv7_cortex_a5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1572,7 +1035,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A15";
+ cpu_pmu->name = "armv7_cortex_a15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1582,7 +1045,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A7";
+ cpu_pmu->name = "armv7_cortex_a7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1592,7 +1055,7 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A12";
+ cpu_pmu->name = "armv7_cortex_a12";
cpu_pmu->map_event = armv7_a12_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1602,7 +1065,7 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7_a12_pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A17";
+ cpu_pmu->name = "armv7_cortex_a17";
return 0;
}
@@ -1823,6 +1286,7 @@ static void krait_pmu_disable_event(struct perf_event *event)
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
/* Disable counter and interrupt */
@@ -1848,6 +1312,7 @@ static void krait_pmu_enable_event(struct perf_event *event)
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
/*
@@ -1981,7 +1446,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
static int krait_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Krait";
+ cpu_pmu->name = "armv7_krait";
/* Some early versions of Krait don't support PC write events */
if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
"qcom,no-pc-write"))
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 63990c42fac..08da0af550b 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -48,118 +48,31 @@ enum xscale_counters {
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
};
#define XSCALE_PMU_ENABLE 0x001
@@ -442,7 +355,7 @@ static int xscale_map_event(struct perf_event *event)
static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "xscale1";
+ cpu_pmu->name = "armv5_xscale1";
cpu_pmu->handle_irq = xscale1pmu_handle_irq;
cpu_pmu->enable = xscale1pmu_enable_event;
cpu_pmu->disable = xscale1pmu_disable_event;
@@ -812,7 +725,7 @@ static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "xscale2";
+ cpu_pmu->name = "armv5_xscale2";
cpu_pmu->handle_irq = xscale2pmu_handle_irq;
cpu_pmu->enable = xscale2pmu_enable_event;
cpu_pmu->disable = xscale2pmu_disable_event;
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index 95858966d84..35e72585ec1 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -3,6 +3,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/kexec.h>
.align 3 /* not needed for this code, but keeps fncpy() happy */
@@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel)
mov r0,#0
ldr r1,kexec_mach_type
ldr r2,kexec_boot_atags
- ARM( mov pc, lr )
+ ARM( ret lr )
THUMB( bx lr )
.align
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8a16ee5d8a9..84db893dedc 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -393,19 +393,34 @@ static void __init cpuid_init_hwcaps(void)
elf_hwcap |= HWCAP_LPAE;
}
-static void __init feat_v6_fixup(void)
+static void __init elf_hwcap_fixup(void)
{
- int id = read_cpuid_id();
-
- if ((id & 0xff0f0000) != 0x41070000)
- return;
+ unsigned id = read_cpuid_id();
+ unsigned sync_prim;
/*
* HWCAP_TLS is available only on 1136 r1p0 and later,
* see also kuser_get_tls_init.
*/
- if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
+ if (read_cpuid_part() == ARM_CPU_PART_ARM1136 &&
+ ((id >> 20) & 3) == 0) {
elf_hwcap &= ~HWCAP_TLS;
+ return;
+ }
+
+ /* Verify if CPUID scheme is implemented */
+ if ((id & 0x000f0000) != 0x000f0000)
+ return;
+
+ /*
+ * If the CPU supports LDREX/STREX and LDREXB/STREXB,
+ * avoid advertising SWP; it may not be atomic with
+ * multiprocessing cores.
+ */
+ sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
+ ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
+ if (sync_prim >= 0x13)
+ elf_hwcap &= ~HWCAP_SWP;
}
/*
@@ -609,7 +624,7 @@ static void __init setup_processor(void)
#endif
erratum_a15_798181_init();
- feat_v6_fixup();
+ elf_hwcap_fixup();
cacheid_init();
cpu_init();
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 1b880db2a03..e1e60e5a7a2 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu)
instr_sync
mov r0, r0
mov r0, r0
- mov pc, r3 @ jump to virtual address
+ ret r3 @ jump to virtual address
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 1aafa0d785e..72f9241ad5d 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -17,6 +17,8 @@
#include <asm/cputype.h>
#define SCU_CTRL 0x00
+#define SCU_ENABLE (1 << 0)
+#define SCU_STANDBY_ENABLE (1 << 5)
#define SCU_CONFIG 0x04
#define SCU_CPU_STATUS 0x08
#define SCU_INVALIDATE 0x0c
@@ -50,10 +52,16 @@ void scu_enable(void __iomem *scu_base)
scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
/* already enabled? */
- if (scu_ctrl & 1)
+ if (scu_ctrl & SCU_ENABLE)
return;
- scu_ctrl |= 1;
+ scu_ctrl |= SCU_ENABLE;
+
+ /* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
+ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
+ (read_cpuid_id() & 0x00f0000f) >= 0x00200000)
+ scu_ctrl |= SCU_STANDBY_ENABLE;
+
writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
/*
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 95d063620b7..2e72be4f623 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -92,15 +92,19 @@ void erratum_a15_798181_init(void)
unsigned int midr = read_cpuid_id();
unsigned int revidr = read_cpuid(CPUID_REVIDR);
- /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
- if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 ||
- (revidr & 0x210) == 0x210) {
- return;
- }
- if (revidr & 0x10)
- erratum_a15_798181_handler = erratum_a15_798181_partial;
- else
+ /* Brahma-B15 r0p0..r0p2 affected
+ * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
+ if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
erratum_a15_798181_handler = erratum_a15_798181_broadcast;
+ else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
+ (revidr & 0x210) != 0x210) {
+ if (revidr & 0x10)
+ erratum_a15_798181_handler =
+ erratum_a15_798181_partial;
+ else
+ erratum_a15_798181_handler =
+ erratum_a15_798181_broadcast;
+ }
}
#endif
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index b1b89882b11..67ca8578c6d 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -27,6 +27,7 @@
#include <linux/perf_event.h>
#include <asm/opcodes.h>
+#include <asm/system_info.h>
#include <asm/traps.h>
#include <asm/uaccess.h>
@@ -266,6 +267,9 @@ static struct undef_hook swp_hook = {
*/
static int __init swp_emulation_init(void)
{
+ if (cpu_architecture() < CPU_ARCH_ARMv7)
+ return 0;
+
#ifdef CONFIG_PROC_FS
if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
return -ENOMEM;
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 829a96d4a17..0cc7e58c47c 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs)
if (!in_lock_functions(regs->ARM_pc))
return regs->ARM_pc;
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
- frame.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &frame);
do {
int ret = unwind_frame(&frame);
if (ret < 0)
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index abd2fc06773..c8e4bb71494 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -31,11 +31,13 @@
#include <asm/exception.h>
#include <asm/unistd.h>
#include <asm/traps.h>
+#include <asm/ptrace.h>
#include <asm/unwind.h>
#include <asm/tls.h>
#include <asm/system_misc.h>
#include <asm/opcodes.h>
+
static const char *handler[]= {
"prefetch abort",
"data abort",
@@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
tsk = current;
if (regs) {
- fp = regs->ARM_fp;
+ fp = frame_pointer(regs);
mode = processor_mode(regs);
} else if (tsk != current) {
fp = thread_saved_fp(tsk);
@@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
dump_instr("", regs);
if (user_mode(regs)) {
__show_regs(regs);
- c_backtrace(regs->ARM_fp, processor_mode(regs));
+ c_backtrace(frame_pointer(regs), processor_mode(regs));
}
}
#endif
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index e67682f02cb..a61a1dfbb0d 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
tsk = current;
if (regs) {
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
+ arm_get_current_stackframe(regs, &frame);
/* PC might be corrupted, use LR in that case. */
- frame.pc = kernel_text_address(regs->ARM_pc)
- ? regs->ARM_pc : regs->ARM_lr;
+ if (!kernel_text_address(regs->ARM_pc))
+ frame.pc = regs->ARM_lr;
} else if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_sp;
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 7bcee5c9b60..6f57cb94367 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -318,7 +318,6 @@ SECTIONS
_end = .;
STABS_DEBUG
- .comment 0 : { *(.comment) }
}
/*
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c52..70bf49b8b24 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __attribute_const__ kvm_target_cpu(void)
{
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
-
- if (implementor != ARM_CPU_IMP_ARM)
- return -EINVAL;
-
- switch (part_number) {
+ switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369c..b2d229f09c0 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
@@ -134,7 +135,7 @@ phase2:
ldr r0, =TRAMPOLINE_VA
adr r1, target
bfi r0, r1, #0, #PAGE_SHIFT
- mov pc, r0
+ ret r0
target: @ We're now in the trampoline code, switch page tables
mcrr p15, 4, r2, r3, c2
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 638deb13da1..b05e9584065 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
- mov pc, lr
+ ret lr
ENDPROC(__ashldi3)
ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 015e8aa5a1d..275d7d2341a 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
- mov pc, lr
+ ret lr
ENDPROC(__ashrdi3)
ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index 4102be617fc..fab5a50503a 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -25,7 +25,7 @@
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
- mov pc, lr
+ ret lr
ENDPROC(c_backtrace)
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 9f12ed1eea8..7d807cfd8ef 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,4 @@
+#include <asm/assembler.h>
#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
@@ -70,7 +71,7 @@ UNWIND( .fnstart )
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
- mov pc, lr
+ ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
@@ -98,7 +99,7 @@ UNWIND( .fnstart )
\store r2, [r1]
moveq r0, #0
restore_irqs ip
- mov pc, lr
+ ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
index 9fcdd154eff..07cda737bb1 100644
--- a/arch/arm/lib/bswapsdi2.S
+++ b/arch/arm/lib/bswapsdi2.S
@@ -1,4 +1,5 @@
#include <linux/linkage.h>
+#include <asm/assembler.h>
#if __LINUX_ARM_ARCH__ >= 6
ENTRY(__bswapsi2)
@@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
mov r3, r3, lsr #8
bic r3, r3, #0xff00
eor r0, r3, r0, ror #8
- mov pc, lr
+ ret lr
ENDPROC(__bswapsi2)
ENTRY(__bswapdi2)
@@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
bic r1, r1, #0xff00
eor r1, r1, r0, ror #8
eor r0, r3, ip, ror #8
- mov pc, lr
+ ret lr
ENDPROC(__bswapdi2)
#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index 916c80f13ae..ed1a421813c 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -36,9 +36,9 @@ ENTRY(call_with_stack)
mov r0, r1
adr lr, BSYM(1f)
- mov pc, r2
+ ret r2
1: ldr lr, [sp]
ldr sp, [sp, #4]
- mov pc, lr
+ ret lr
ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index 31d3cb34740..984e0f29d54 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -97,7 +97,7 @@ td3 .req lr
#endif
#endif
adcnes sum, sum, td0 @ update checksum
- mov pc, lr
+ ret lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d6e742d2400..10b45909610 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -7,6 +7,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <asm/assembler.h>
/*
* unsigned int
@@ -40,7 +41,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
- moveq pc, lr @ dst is now 32bit aligned
+ reteq lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
sub len, len, #2
@@ -48,7 +49,7 @@ sum .req r3
strb r8, [dst], #1
adcs sum, sum, ip, put_byte_1
strb ip, [dst], #1
- mov pc, lr @ dst is now 32bit aligned
+ ret lr @ dst is now 32bit aligned
/*
* Handle 0 to 7 bytes, with any alignment of source and
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index bc1033b897b..518bf6e93f7 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
mul r0, r2, r0 @ max = 2^32-1
add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
- moveq pc, lr
+ reteq lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
@@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
#endif
bhi __loop_delay
- mov pc, lr
+ ret lr
ENDPROC(__loop_udelay)
ENDPROC(__loop_const_udelay)
ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index e55c4842c29..a9eafe4981e 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unwind.h>
#ifdef __ARMEB__
@@ -97,7 +98,7 @@ UNWIND(.fnstart)
mov yl, #0
cmpeq xl, r4
movlo xh, xl
- movlo pc, lr
+ retlo lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +112,14 @@ UNWIND(.fnstart)
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
- mov pc, lr
+ ret lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
- moveq pc, lr
+ reteq lr
@ We still have remainer bits in the low part. Bring them up.
@@ -144,7 +145,7 @@ UNWIND(.fnstart)
movs ip, ip, lsr #1
mov xh, #1
bne 4b
- mov pc, lr
+ ret lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
@@ -184,13 +185,13 @@ UNWIND(.fnstart)
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
- mov pc, lr
+ ret lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
- moveq pc, lr
+ reteq lr
UNWIND(.fnend)
UNWIND(.fnstart)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 64f6bc1a913..7848780e883 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_zero_bit_le)
/*
@@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_bit_le)
/*
@@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
@@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
@@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
#endif
cmp r1, r0 @ Clamp to maxbit
movlo r0, r1
- mov pc, lr
+ ret lr
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb41fca..938600098b8 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -36,7 +36,7 @@ ENTRY(__get_user_1)
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
@@ -56,25 +56,60 @@ rb .req r0
orr r2, rb, r2, lsl #8
#endif
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
4: TUSER(ldr) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 9f4238987fe..c31b2f3153f 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -25,7 +25,7 @@
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne .Linsb_align
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 7a7430950c7..2ed86fa5465 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -12,7 +12,7 @@
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne 3f
@@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
stmcsia r1!, {r3, ip}
ldrne r3, [r0, #0]
strne r3, [r1, #0]
- mov pc, lr
+ ret lr
3: ldr r3, [r0]
cmp ip, #2
@@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
strb r3, [r1, #1]
8: mov r3, ip, get_byte_0
strb r3, [r1, #0]
- mov pc, lr
+ ret lr
ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 88487c8c4f2..413da991452 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -27,11 +27,11 @@
strb r3, [r1], #1
subs r2, r2, #1
- moveq pc, lr
+ reteq lr
ENTRY(__raw_readsw)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Linsw_align
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 1f393d42593..d9a45e9692a 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -26,7 +26,7 @@
ENTRY(__raw_readsw)
teq r2, #0
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Linsw_align
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 68b92f4acae..a46bbc9b168 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -45,7 +45,7 @@
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne .Loutsb_align
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index d0d104a0dd1..4ea2435988c 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -12,7 +12,7 @@
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne 3f
@@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
ldrne r3, [r1, #0]
strcs ip, [r0, #0]
strne r3, [r0, #0]
- mov pc, lr
+ ret lr
3: bic r1, r1, #3
ldr r3, [r1], #4
@@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
- mov pc, lr
+ ret lr
5: mov ip, r3, lspull #8
ldr r3, [r1], #4
@@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
- mov pc, lr
+ ret lr
6: mov ip, r3, lspull #24
ldr r3, [r1], #4
@@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
- mov pc, lr
+ ret lr
ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index 49b800419e3..121789eb680 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -28,11 +28,11 @@
orr r3, r3, r3, lsl #16
str r3, [r0]
subs r2, r2, #1
- moveq pc, lr
+ reteq lr
ENTRY(__raw_writesw)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Loutsw_align
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index ff4f71b579e..269f90c51ad 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -31,7 +31,7 @@
ENTRY(__raw_writesw)
teq r2, #0
- moveq pc, lr
+ reteq lr
ands r3, r1, #3
bne .Loutsw_align
@@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
tst r2, #1
3: movne ip, r3, lsr #8
strneh ip, [r0]
- mov pc, lr
+ ret lr
ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f649734..947567ff67f 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
subs r2, r1, #1
- moveq pc, lr
+ reteq lr
bcc Ldiv0
cmp r0, r1
bls 11f
@@ -220,16 +220,16 @@ UNWIND(.fnstart)
ARM_DIV_BODY r0, r1, r2, r3
mov r0, r2
- mov pc, lr
+ ret lr
11: moveq r0, #1
movne r0, #0
- mov pc, lr
+ ret lr
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
@@ -244,11 +244,11 @@ UNWIND(.fnstart)
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
- movls pc, lr
+ retls lr
ARM_MOD_BODY r0, r1, r2, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
@@ -274,23 +274,23 @@ UNWIND(.fnstart)
cmp ip, #0
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
- mov pc, lr
+ ret lr
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__divsi3)
@@ -315,7 +315,7 @@ UNWIND(.fnstart)
10: cmp ip, #0
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__modsi3)
@@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
@@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index f83d449141f..922dcd88b02 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
- mov pc, lr
+ ret lr
ENDPROC(__lshrdi3)
ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index 1da86991d70..74a5bed6d99 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -22,5 +22,5 @@ ENTRY(memchr)
bne 1b
sub r0, r0, #1
2: movne r0, #0
- mov pc, lr
+ ret lr
ENDPROC(memchr)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 94b0650ea98..671455c854f 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -110,7 +110,7 @@ ENTRY(memset)
strneb r1, [ip], #1
tst r2, #1
strneb r1, [ip], #1
- mov pc, lr
+ ret lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 3fbdef5f802..385ccb306fa 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -121,5 +121,5 @@ ENTRY(__memzero)
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
strneb r2, [r0], #1 @ 1
- mov pc, lr @ 1
+ ret lr @ 1
ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index 36c91b4957e..20430595692 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
adc xh, xh, yh, lsr #16
adds xl, xl, ip, lsl #16
adc xh, xh, ip, lsr #16
- mov pc, lr
+ ret lr
ENDPROC(__muldi3)
ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 3d73dcb959b..38d660d3705 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -36,7 +36,7 @@ ENTRY(__put_user_1)
check_uaccess r0, 1, r1, ip, __put_user_bad
1: TUSER(strb) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
@@ -60,14 +60,14 @@ ENTRY(__put_user_2)
#endif
#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
check_uaccess r0, 4, r1, ip, __put_user_bad
4: TUSER(str) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
@@ -80,12 +80,12 @@ ENTRY(__put_user_8)
6: TUSER(str) r3, [r0]
#endif
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_8)
__put_user_bad:
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
ENDPROC(__put_user_bad)
.pushsection __ex_table, "a"
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index d8f2a1c1aea..013d64c71e8 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -23,5 +23,5 @@ ENTRY(strchr)
teq r2, r1
movne r0, #0
subeq r0, r0, #1
- mov pc, lr
+ ret lr
ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 302f20cd242..3cec1c7482c 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -22,5 +22,5 @@ ENTRY(strrchr)
teq r2, #0
bne 1b
mov r0, r3
- mov pc, lr
+ ret lr
ENDPROC(strrchr)
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f0df6a91db0..ad4a6309141 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
movlo r0, #0
moveq r0, #1
movhi r0, #2
- mov pc, lr
+ ret lr
ENDPROC(__ucmpdi2)
@@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
movlo r0, #-1
moveq r0, #0
movhi r0, #1
- mov pc, lr
+ ret lr
ENDPROC(__aeabi_ulcmp)
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
index d4e9316ecac..a5336a5e273 100644
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -213,7 +213,7 @@ ddr2clk_stop_done:
cmp ip, r0
bne ddr2clk_stop_done
- mov pc, lr
+ ret lr
ENDPROC(davinci_ddr_psc_config)
CACHE_FLUSH:
diff --git a/arch/arm/mach-ebsa110/include/mach/memory.h b/arch/arm/mach-ebsa110/include/mach/memory.h
index 8e49066ad85..866f8a1c6ff 100644
--- a/arch/arm/mach-ebsa110/include/mach/memory.h
+++ b/arch/arm/mach-ebsa110/include/mach/memory.h
@@ -17,11 +17,6 @@
#define __ASM_ARCH_MEMORY_H
/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
-/*
* Cache flushing area - SRAM
*/
#define FLUSH_BASE_PHYS 0x40000000
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S
index e96923a3017..ee0be2af5c6 100644
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ b/arch/arm/mach-ep93xx/crunch-bits.S
@@ -198,7 +198,7 @@ crunch_load:
get_thread_info r10
#endif
2: dec_preempt_count r10, r3
- mov pc, lr
+ ret lr
/*
* Back up crunch regs to save area and disable access to them
@@ -277,7 +277,7 @@ ENTRY(crunch_task_copy)
mov r3, lr @ preserve return address
bl crunch_save
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Restore crunch state from given memory address
@@ -310,4 +310,4 @@ ENTRY(crunch_task_restore)
mov r3, lr @ preserve return address
bl crunch_load
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
diff --git a/arch/arm/mach-ep93xx/include/mach/memory.h b/arch/arm/mach-ep93xx/include/mach/memory.h
deleted file mode 100644
index c9400cf0051..00000000000
--- a/arch/arm/mach-ep93xx/include/mach/memory.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/include/mach/memory.h
- */
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xc0000000)
-#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xd0000000)
-#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xe0000000)
-#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xf0000000)
-#else
-#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
-#endif
-
-#endif
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 8f9b66c4ac7..5d4ff6571dc 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -119,6 +119,7 @@ config EXYNOS5420_MCPM
bool "Exynos5420 Multi-Cluster PM support"
depends on MCPM && SOC_EXYNOS5420
select ARM_CCI
+ select ARM_CPU_SUSPEND
help
This is needed to provide CPU and cluster power management
on Exynos5420 implementing big.LITTLE.
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index ace0ed61747..a96b78f93f2 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -196,7 +196,7 @@ static void exynos_power_down(void)
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&exynos_mcpm_lock);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
@@ -289,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
"b cci_enable_port_for_self");
}
+static void __init exynos_cache_off(void)
+{
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
+ /* disable L2 prefetching on the Cortex-A15 */
+ asm volatile(
+ "mcr p15, 1, %0, c15, c0, 3\n\t"
+ "isb\n\t"
+ "dsb"
+ : : "r" (0x400));
+ }
+ exynos_v7_exit_coherency_flush(all);
+}
+
static const struct of_device_id exynos_dt_mcpm_match[] = {
{ .compatible = "samsung,exynos5420" },
{ .compatible = "samsung,exynos5800" },
@@ -332,6 +345,8 @@ static int __init exynos_mcpm_init(void)
ret = mcpm_platform_register(&exynos_power_ops);
if (!ret)
ret = mcpm_sync_init(exynos_pm_power_up_setup);
+ if (!ret)
+ ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
if (ret) {
iounmap(ns_sram_base_addr);
return ret;
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 50b9aad5e27..70d1e65a51d 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -190,7 +190,7 @@ static void __init exynos_smp_init_cpus(void)
void __iomem *scu_base = scu_base_addr();
unsigned int i, ncores;
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
ncores = scu_base ? scu_get_core_count(scu_base) : 1;
else
/*
@@ -216,7 +216,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
exynos_sysram_init();
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(scu_base_addr());
/*
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 202ca73e49c..67d383de614 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -300,7 +300,7 @@ static int exynos_pm_suspend(void)
tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
return 0;
@@ -334,7 +334,7 @@ static void exynos_pm_resume(void)
if (exynos_pm_central_resume())
goto early_wakeup;
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_restore_register();
/* For release retention */
@@ -353,7 +353,7 @@ static void exynos_pm_resume(void)
s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(S5P_VA_SCU);
early_wakeup:
@@ -440,15 +440,14 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self,
case CPU_PM_ENTER:
if (cpu == 0) {
exynos_pm_central_suspend();
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
}
break;
case CPU_PM_EXIT:
if (cpu == 0) {
- if (read_cpuid_part_number() ==
- ARM_CPU_PART_CORTEX_A9) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
scu_enable(S5P_VA_SCU);
exynos_cpu_restore_register();
}
diff --git a/arch/arm/mach-footbridge/include/mach/memory.h b/arch/arm/mach-footbridge/include/mach/memory.h
index 5c6df377f96..6f2ecccdf32 100644
--- a/arch/arm/mach-footbridge/include/mach/memory.h
+++ b/arch/arm/mach-footbridge/include/mach/memory.h
@@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long);
*/
#define FLUSH_BASE 0xf9000000
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#define FLUSH_BASE_PHYS 0x50000000
#endif
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index fe123b079c0..74b50f1982d 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hardware/cache-l2x0.h>
#include "hardware.h"
@@ -301,7 +302,7 @@ rbc_loop:
resume_mmdc
/* return to suspend finish */
- mov pc, lr
+ ret lr
resume:
/* invalidate L1 I-cache first */
@@ -325,7 +326,7 @@ resume:
mov r5, #0x1
resume_mmdc
- mov pc, lr
+ ret lr
ENDPROC(imx6_suspend)
/*
diff --git a/arch/arm/mach-integrator/include/mach/memory.h b/arch/arm/mach-integrator/include/mach/memory.h
index 334d5e27188..7268cb50ded 100644
--- a/arch/arm/mach-integrator/include/mach/memory.h
+++ b/arch/arm/mach-integrator/include/mach/memory.h
@@ -20,11 +20,6 @@
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#define BUS_OFFSET UL(0x80000000)
#define __virt_to_bus(x) ((x) - PAGE_OFFSET + BUS_OFFSET)
#define __bus_to_virt(x) ((x) - BUS_OFFSET + PAGE_OFFSET)
diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h
index 17b40279e0a..9311ee2126d 100644
--- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h
+++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h
@@ -3,7 +3,7 @@
#ifndef __ASSEMBLY__
-#include <linux/reboot.h>
+enum reboot_mode;
/* The ATU offsets can change based on the strapping */
extern u32 iop13xx_atux_pmmr_offset;
diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h
index 7c032d0ab24..59307e78758 100644
--- a/arch/arm/mach-iop13xx/include/mach/memory.h
+++ b/arch/arm/mach-iop13xx/include/mach/memory.h
@@ -3,11 +3,6 @@
#include <mach/hardware.h>
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#ifndef __ASSEMBLY__
#if defined(CONFIG_ARCH_IOP13XX)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bca96f43349..53c316f7301 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -20,6 +20,7 @@
#include <linux/dma-mapping.h>
#include <linux/serial_8250.h>
#include <linux/io.h>
+#include <linux/reboot.h>
#ifdef CONFIG_MTD_PHYSMAP
#include <linux/mtd/physmap.h>
#endif
diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h
index 95e731a7ed6..ab0d27fa896 100644
--- a/arch/arm/mach-ks8695/include/mach/memory.h
+++ b/arch/arm/mach-ks8695/include/mach/memory.h
@@ -15,11 +15,6 @@
#include <mach/hardware.h>
-/*
- * Physical SRAM offset.
- */
-#define PLAT_PHYS_OFFSET KS8695_SDRAM_PA
-
#ifndef __ASSEMBLY__
#ifdef CONFIG_PCI
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
index 510c29e079c..f5d881b5d0f 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -46,7 +46,7 @@ ENTRY(ll_get_coherency_base)
ldr r1, =coherency_base
ldr r1, [r1]
2:
- mov pc, lr
+ ret lr
ENDPROC(ll_get_coherency_base)
/*
@@ -63,7 +63,7 @@ ENTRY(ll_get_coherency_cpumask)
mov r2, #(1 << 24)
lsl r3, r2, r3
ARM_BE8(rev r3, r3)
- mov pc, lr
+ ret lr
ENDPROC(ll_get_coherency_cpumask)
/*
@@ -94,7 +94,7 @@ ENTRY(ll_add_cpu_to_smp_group)
strex r1, r2, [r0]
cmp r1, #0
bne 1b
- mov pc, lr
+ ret lr
ENDPROC(ll_add_cpu_to_smp_group)
ENTRY(ll_enable_coherency)
@@ -118,7 +118,7 @@ ENTRY(ll_enable_coherency)
bne 1b
dsb
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(ll_enable_coherency)
ENTRY(ll_disable_coherency)
@@ -141,7 +141,7 @@ ENTRY(ll_disable_coherency)
cmp r1, #0
bne 1b
dsb
- mov pc, lr
+ ret lr
ENDPROC(ll_disable_coherency)
.align 2
diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
index da5bb292b91..2c3c7fc65e2 100644
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ b/arch/arm/mach-mvebu/headsmp-a9.S
@@ -29,7 +29,7 @@ ARM_BE8(setend be)
ldr r0, [r0]
ldr r1, [r0]
ARM_BE8(rev r1, r1)
- mov pc, r1
+ ret r1
1:
.word CPU_RESUME_ADDR_REG
armada_375_smp_cpu1_enable_code_end:
diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h
index 3c253052311..058a4f7d44c 100644
--- a/arch/arm/mach-omap1/include/mach/memory.h
+++ b/arch/arm/mach-omap1/include/mach/memory.h
@@ -6,11 +6,6 @@
#define __ASM_ARCH_MEMORY_H
/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x10000000)
-
-/*
* Bus address is physical address, except for OMAP-1510 Local Bus.
* OMAP-1510 bus address is translated into a Local Bus address if the
* OMAP bus type is lbus. We do the address translation based on the
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index 9086ce03ae1..b84a0122d82 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/smp_scu.h>
#include <asm/memory.h>
#include <asm/hardware/cache-l2x0.h>
@@ -334,7 +335,7 @@ ENDPROC(omap4_cpu_resume)
#ifndef CONFIG_OMAP4_ERRATA_I688
ENTRY(omap_bus_sync)
- mov pc, lr
+ ret lr
ENDPROC(omap_bus_sync)
#endif
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index 680a7c56cc3..2c88ff2d023 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -101,7 +101,7 @@ i_dll_wait:
i_dll_delay:
subs r4, r4, #0x1
bne i_dll_delay
- mov pc, lr
+ ret lr
/*
* shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
ldr r7, [r3] @ get timer value
cmp r5, r7 @ time up?
bhi volt_delay @ not yet->branch
- mov pc, lr @ back to caller.
+ ret lr @ back to caller.
omap242x_sdi_cm_clksel2_pll:
.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
ldr r7, [r10] @ get timer value
cmp r8, r7 @ time up?
bhi volt_delay_c @ not yet->branch
- mov pc, lr @ back to caller
+ ret lr @ back to caller
omap242x_srs_cm_clksel2_pll:
.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index a1e9edd673f..d5deb9761fc 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -101,7 +101,7 @@ i_dll_wait:
i_dll_delay:
subs r4, r4, #0x1
bne i_dll_delay
- mov pc, lr
+ ret lr
/*
* shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
ldr r7, [r3] @ get timer value
cmp r5, r7 @ time up?
bhi volt_delay @ not yet->branch
- mov pc, lr @ back to caller.
+ ret lr @ back to caller.
omap243x_sdi_cm_clksel2_pll:
.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
ldr r7, [r10] @ get timer value
cmp r8, r7 @ time up?
bhi volt_delay_c @ not yet->branch
- mov pc, lr @ back to caller
+ ret lr @ back to caller
omap243x_srs_cm_clksel2_pll:
.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S
index 324d25a48c8..81591491ab9 100644
--- a/arch/arm/mach-pxa/mioa701_bootresume.S
+++ b/arch/arm/mach-pxa/mioa701_bootresume.S
@@ -29,7 +29,7 @@ ENTRY(mioa701_jumpaddr)
str r1, [r0] @ Early disable resume for next boot
ldr r0, mioa701_jumpaddr @ (Murphy's Law)
ldr r0, [r0]
- mov pc, r0
+ ret r0
2:
ENTRY(mioa701_bootstrap_lg)
diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S
index 29f5f5c180b..eab1645bb4a 100644
--- a/arch/arm/mach-pxa/standby.S
+++ b/arch/arm/mach-pxa/standby.S
@@ -29,7 +29,7 @@ ENTRY(pxa_cpu_standby)
.align 5
1: mcr p14, 0, r2, c7, c0, 0 @ put the system into Standby
str r1, [r0] @ make sure PSSR_PH/STS are clear
- mov pc, lr
+ ret lr
#endif
@@ -108,7 +108,7 @@ ENTRY(pm_enter_standby_start)
bic r0, r0, #0x20000000
str r0, [r1, #PXA3_DMCIER]
- mov pc, lr
+ ret lr
ENTRY(pm_enter_standby_end)
#endif
diff --git a/arch/arm/mach-realview/include/mach/memory.h b/arch/arm/mach-realview/include/mach/memory.h
index db09170e383..23e7a313f75 100644
--- a/arch/arm/mach-realview/include/mach/memory.h
+++ b/arch/arm/mach-realview/include/mach/memory.h
@@ -20,15 +20,6 @@
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET
-#define PLAT_PHYS_OFFSET UL(0x70000000)
-#else
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-#endif
-
#ifdef CONFIG_SPARSEMEM
/*
diff --git a/arch/arm/mach-rpc/include/mach/memory.h b/arch/arm/mach-rpc/include/mach/memory.h
index 18a221093bf..b7e49571417 100644
--- a/arch/arm/mach-rpc/include/mach/memory.h
+++ b/arch/arm/mach-rpc/include/mach/memory.h
@@ -19,11 +19,6 @@
#define __ASM_ARCH_MEMORY_H
/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x10000000)
-
-/*
* Cache flushing area - ROM
*/
#define FLUSH_BASE_PHYS 0x00000000
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
index c9b91223697..875ba891112 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
@@ -66,4 +66,4 @@ s3c2410_do_sleep:
streq r8, [r5] @ SDRAM power-down config
streq r9, [r6] @ CPU sleep
1: beq 1b
- mov pc, r14
+ ret lr
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
index 5adaceb7da1..6bf5b4d8743 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
@@ -65,4 +65,4 @@ s3c2412_sleep_enter1:
strne r9, [r3]
bne s3c2412_sleep_enter1
- mov pc, r14
+ ret lr
diff --git a/arch/arm/mach-s5pv210/include/mach/memory.h b/arch/arm/mach-s5pv210/include/mach/memory.h
index 2d3cfa221d5..d584fac9156 100644
--- a/arch/arm/mach-s5pv210/include/mach/memory.h
+++ b/arch/arm/mach-s5pv210/include/mach/memory.h
@@ -13,8 +13,6 @@
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-#define PLAT_PHYS_OFFSET UL(0x20000000)
-
/*
* Sparsemem support
* Physical memory can be located from 0x20000000 to 0x7fffffff,
diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h
index 12d376795ab..2054051eb79 100644
--- a/arch/arm/mach-sa1100/include/mach/memory.h
+++ b/arch/arm/mach-sa1100/include/mach/memory.h
@@ -10,11 +10,6 @@
#include <asm/sizes.h>
/*
- * Physical DRAM offset is 0xc0000000 on the SA1100
- */
-#define PLAT_PHYS_OFFSET UL(0xc0000000)
-
-/*
* Because of the wide memory address space between physical RAM banks on the
* SA1100, it's much convenient to use Linux's SparseMEM support to implement
* our memory map representation. Assuming all memory nodes have equal access
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index e5be5c88644..293007579b8 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -12,6 +12,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
ENTRY(shmobile_invalidate_start)
@@ -75,7 +76,7 @@ shmobile_smp_boot_next:
shmobile_smp_boot_found:
ldr r0, [r7, r1, lsl #2]
- mov pc, r9
+ ret r9
ENDPROC(shmobile_smp_boot)
ENTRY(shmobile_smp_sleep)
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
index aaaf3abd268..be4bc5f853f 100644
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ b/arch/arm/mach-tegra/sleep-tegra20.S
@@ -78,7 +78,7 @@ ENTRY(tegra20_hotplug_shutdown)
/* Put this CPU down */
cpu_id r0
bl tegra20_cpu_shutdown
- mov pc, lr @ should never get here
+ ret lr @ should never get here
ENDPROC(tegra20_hotplug_shutdown)
/*
@@ -96,7 +96,7 @@ ENDPROC(tegra20_hotplug_shutdown)
*/
ENTRY(tegra20_cpu_shutdown)
cmp r0, #0
- moveq pc, lr @ must not be called for CPU 0
+ reteq lr @ must not be called for CPU 0
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_RESETTABLE
str r12, [r1]
@@ -117,7 +117,7 @@ ENTRY(tegra20_cpu_shutdown)
cpu_id r3
cmp r3, r0
beq .
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_shutdown)
#endif
@@ -164,7 +164,7 @@ ENTRY(tegra_pen_lock)
cmpeq r12, r0 @ !turn == cpu?
beq 1b @ while !turn == cpu && flag[!cpu] == 1
- mov pc, lr @ locked
+ ret lr @ locked
ENDPROC(tegra_pen_lock)
ENTRY(tegra_pen_unlock)
@@ -176,7 +176,7 @@ ENTRY(tegra_pen_unlock)
addne r2, r3, #PMC_SCRATCH39
mov r12, #0
str r12, [r2]
- mov pc, lr
+ ret lr
ENDPROC(tegra_pen_unlock)
/*
@@ -189,7 +189,7 @@ ENTRY(tegra20_cpu_clear_resettable)
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_NOT_RESETTABLE
str r12, [r1]
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_clear_resettable)
/*
@@ -202,7 +202,7 @@ ENTRY(tegra20_cpu_set_resettable_soon)
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_RESETTABLE_SOON
str r12, [r1]
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_set_resettable_soon)
/*
@@ -217,7 +217,7 @@ ENTRY(tegra20_cpu_is_resettable_soon)
cmp r12, #CPU_RESETTABLE_SOON
moveq r0, #1
movne r0, #0
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_is_resettable_soon)
/*
@@ -239,7 +239,7 @@ ENTRY(tegra20_sleep_core_finish)
mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA
add r0, r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra20_sleep_core_finish)
/*
@@ -402,7 +402,7 @@ exit_selfrefresh_loop:
mov32 r0, TEGRA_PMC_BASE
ldr r0, [r0, #PMC_SCRATCH41]
- mov pc, r0 @ jump to tegra_resume
+ ret r0 @ jump to tegra_resume
ENDPROC(tegra20_lp1_reset)
/*
@@ -455,7 +455,7 @@ tegra20_switch_cpu_to_clk32k:
mov r0, #0 /* brust policy = 32KHz */
str r0, [r5, #CLK_RESET_SCLK_BURST]
- mov pc, lr
+ ret lr
/*
* tegra20_enter_sleep
@@ -535,7 +535,7 @@ padsave_done:
adr r2, tegra20_sclk_save
str r0, [r2]
dsb
- mov pc, lr
+ ret lr
tegra20_sdram_pad_address:
.word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index b16d4a57fa5..09cad9b071d 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -142,7 +142,7 @@ ENTRY(tegra30_hotplug_shutdown)
/* Powergate this CPU */
mov r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
bl tegra30_cpu_shutdown
- mov pc, lr @ should never get here
+ ret lr @ should never get here
ENDPROC(tegra30_hotplug_shutdown)
/*
@@ -161,7 +161,7 @@ ENTRY(tegra30_cpu_shutdown)
bne _no_cpu0_chk @ It's not Tegra30
cmp r3, #0
- moveq pc, lr @ Must never be called for CPU 0
+ reteq lr @ Must never be called for CPU 0
_no_cpu0_chk:
ldr r12, =TEGRA_FLOW_CTRL_VIRT
@@ -266,7 +266,7 @@ ENTRY(tegra30_sleep_core_finish)
mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA
add r0, r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra30_sleep_core_finish)
/*
@@ -285,7 +285,7 @@ ENTRY(tegra30_sleep_cpu_secondary_finish)
mov r0, #0 @ power mode flags (!hotplug)
bl tegra30_cpu_shutdown
mov r0, #1 @ never return here
- mov pc, r7
+ ret r7
ENDPROC(tegra30_sleep_cpu_secondary_finish)
/*
@@ -529,7 +529,7 @@ __no_dual_emc_chanl:
mov32 r0, TEGRA_PMC_BASE
ldr r0, [r0, #PMC_SCRATCH41]
- mov pc, r0 @ jump to tegra_resume
+ ret r0 @ jump to tegra_resume
ENDPROC(tegra30_lp1_reset)
.align L1_CACHE_SHIFT
@@ -659,7 +659,7 @@ _no_pll_in_iddq:
mov r0, #0 /* brust policy = 32KHz */
str r0, [r5, #CLK_RESET_SCLK_BURST]
- mov pc, lr
+ ret lr
/*
* tegra30_enter_sleep
@@ -819,7 +819,7 @@ pmc_io_dpd_skip:
dsb
- mov pc, lr
+ ret lr
.ltorg
/* dummy symbol for end of IRAM */
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 8d06213fbc4..f024a5109e8 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -87,7 +87,7 @@ ENTRY(tegra_init_l2_for_a15)
mcrne p15, 0x1, r0, c9, c0, 2
_exit_init_l2_a15:
- mov pc, lr
+ ret lr
ENDPROC(tegra_init_l2_for_a15)
/*
@@ -111,7 +111,7 @@ ENTRY(tegra_sleep_cpu_finish)
add r3, r3, r0
mov r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra_sleep_cpu_finish)
/*
@@ -139,7 +139,7 @@ ENTRY(tegra_shut_off_mmu)
moveq r3, #0
streq r3, [r2, #L2X0_CTRL]
#endif
- mov pc, r0
+ ret r0
ENDPROC(tegra_shut_off_mmu)
.popsection
@@ -156,6 +156,6 @@ ENTRY(tegra_switch_cpu_to_pllp)
str r0, [r5, #CLK_RESET_CCLK_BURST]
mov r0, #0
str r0, [r5, #CLK_RESET_CCLK_DIVIDER]
- mov pc, lr
+ ret lr
ENDPROC(tegra_switch_cpu_to_pllp)
#endif
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index b743a0ae02c..2fb78b4648c 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -152,7 +152,7 @@ static void tc2_pm_down(u64 residency)
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&tc2_pm_lock);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
@@ -323,6 +323,21 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
" b cci_enable_port_for_self ");
}
+static void __init tc2_cache_off(void)
+{
+ pr_info("TC2: disabling cache during MCPM loopback test\n");
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
+ /* disable L2 prefetching on the Cortex-A15 */
+ asm volatile(
+ "mcr p15, 1, %0, c15, c0, 3 \n\t"
+ "isb \n\t"
+ "dsb "
+ : : "r" (0x400) );
+ }
+ v7_exit_coherency_flush(all);
+ cci_disable_port_by_cpu(read_cpuid_mpidr());
+}
+
static int __init tc2_pm_init(void)
{
int ret, irq;
@@ -370,6 +385,8 @@ static int __init tc2_pm_init(void)
ret = mcpm_platform_register(&tc2_pm_power_ops);
if (!ret) {
mcpm_sync_init(tc2_pm_power_up_setup);
+ /* test if we can (re)enable the CCI on our own */
+ BUG_ON(mcpm_loopback(tc2_cache_off) != 0);
pr_info("TC2 power management initialized\n");
}
return ret;
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c348eaee7ee..577039a3f6e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -669,7 +669,7 @@ config ARM_VIRT_EXT
details.
config SWP_EMULATE
- bool "Emulate SWP/SWPB instructions"
+ bool "Emulate SWP/SWPB instructions" if !SMP
depends on CPU_V7
default y if SMP
select HAVE_PROC_CPU if PROC_FS
@@ -907,8 +907,8 @@ config PL310_ERRATA_588369
They are architecturally defined to behave as the execution of a
clean operation followed immediately by an invalidate operation,
both performing to the same memory location. This functionality
- is not correctly implemented in PL310 as clean lines are not
- invalidated as a result of these operations.
+ is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0)
+ as clean lines are not invalidated as a result of these operations.
config PL310_ERRATA_727915
bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
@@ -918,7 +918,8 @@ config PL310_ERRATA_727915
PL310 can handle normal accesses while it is in progress. Under very
rare circumstances, due to this erratum, write data can be lost when
PL310 treats a cacheable write transaction during a Clean &
- Invalidate by Way operation.
+ Invalidate by Way operation. Revisions prior to r3p1 are affected by
+ this errata (fixed in r3p1).
config PL310_ERRATA_753970
bool "PL310 errata: cache sync operation may be faulty"
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b8cb1a2688a..0c1ab49e5f7 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -76,6 +76,7 @@
static unsigned long ai_user;
static unsigned long ai_sys;
+static void *ai_sys_last_pc;
static unsigned long ai_skipped;
static unsigned long ai_half;
static unsigned long ai_word;
@@ -130,7 +131,7 @@ static const char *usermode_action[] = {
static int alignment_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "User:\t\t%lu\n", ai_user);
- seq_printf(m, "System:\t\t%lu\n", ai_sys);
+ seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc);
seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
seq_printf(m, "Half:\t\t%lu\n", ai_half);
seq_printf(m, "Word:\t\t%lu\n", ai_word);
@@ -794,6 +795,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto user;
ai_sys += 1;
+ ai_sys_last_pc = (void *)instruction_pointer(regs);
fixup:
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index e505befe51b..2f0c58836ae 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -15,6 +15,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/page.h>
@@ -45,7 +46,7 @@
ENTRY(fa_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(fa_flush_icache_all)
/*
@@ -71,7 +72,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -99,7 +100,7 @@ ENTRY(fa_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -135,7 +136,7 @@ ENTRY(fa_coherent_user_range)
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -155,7 +156,7 @@ ENTRY(fa_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -181,7 +182,7 @@ fa_dma_inv_range:
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -199,7 +200,7 @@ fa_dma_clean_range:
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start,end)
@@ -214,7 +215,7 @@ ENTRY(fa_dma_flush_range)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -237,7 +238,7 @@ ENDPROC(fa_dma_map_area)
* - dir - DMA direction
*/
ENTRY(fa_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(fa_dma_unmap_area)
.globl fa_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 7c3fb41a462..5f2c988a06a 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -665,7 +665,7 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v
static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
{
unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
- bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+ bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
if (rev >= L310_CACHE_ID_RTL_R2P0) {
if (cortex_a9) {
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
index 8e12ddca003..f1cc9861031 100644
--- a/arch/arm/mm/cache-nop.S
+++ b/arch/arm/mm/cache-nop.S
@@ -5,11 +5,12 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include "proc-macros.S"
ENTRY(nop_flush_icache_all)
- mov pc, lr
+ ret lr
ENDPROC(nop_flush_icache_all)
.globl nop_flush_kern_cache_all
@@ -29,7 +30,7 @@ ENDPROC(nop_flush_icache_all)
ENTRY(nop_coherent_user_range)
mov r0, 0
- mov pc, lr
+ ret lr
ENDPROC(nop_coherent_user_range)
.globl nop_flush_kern_dcache_area
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index a7ba68f59f0..91e3adf155c 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -18,7 +19,7 @@
* Unconditionally clean and invalidate the entire icache.
*/
ENTRY(v4_flush_icache_all)
- mov pc, lr
+ ret lr
ENDPROC(v4_flush_icache_all)
/*
@@ -40,7 +41,7 @@ ENTRY(v4_flush_kern_cache_all)
#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
- mov pc, lr
+ ret lr
#else
/* FALLTHROUGH */
#endif
@@ -59,7 +60,7 @@ ENTRY(v4_flush_user_cache_range)
#ifdef CONFIG_CPU_CP15
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ flush ID cache
- mov pc, lr
+ ret lr
#else
/* FALLTHROUGH */
#endif
@@ -89,7 +90,7 @@ ENTRY(v4_coherent_kern_range)
*/
ENTRY(v4_coherent_user_range)
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -116,7 +117,7 @@ ENTRY(v4_dma_flush_range)
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
#endif
- mov pc, lr
+ ret lr
/*
* dma_unmap_area(start, size, dir)
@@ -136,7 +137,7 @@ ENTRY(v4_dma_unmap_area)
* - dir - DMA direction
*/
ENTRY(v4_dma_map_area)
- mov pc, lr
+ ret lr
ENDPROC(v4_dma_unmap_area)
ENDPROC(v4_dma_map_area)
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index cd494532140..2522f8c8fbb 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -58,7 +59,7 @@ flush_base:
ENTRY(v4wb_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(v4wb_flush_icache_all)
/*
@@ -94,7 +95,7 @@ __flush_whole_cache:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -122,7 +123,7 @@ ENTRY(v4wb_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -170,7 +171,7 @@ ENTRY(v4wb_coherent_user_range)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
@@ -195,7 +196,7 @@ v4wb_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -212,7 +213,7 @@ v4wb_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -248,7 +249,7 @@ ENDPROC(v4wb_dma_map_area)
* - dir - DMA direction
*/
ENTRY(v4wb_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(v4wb_dma_unmap_area)
.globl v4wb_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 11e5e5838bc..a0982ce4900 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -48,7 +49,7 @@
ENTRY(v4wt_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(v4wt_flush_icache_all)
/*
@@ -71,7 +72,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -94,7 +95,7 @@ ENTRY(v4wt_flush_user_cache_range)
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -126,7 +127,7 @@ ENTRY(v4wt_coherent_user_range)
cmp r0, r1
blo 1b
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -160,7 +161,7 @@ v4wt_dma_inv_range:
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -192,7 +193,7 @@ ENTRY(v4wt_dma_unmap_area)
* - dir - DMA direction
*/
ENTRY(v4wt_dma_map_area)
- mov pc, lr
+ ret lr
ENDPROC(v4wt_dma_unmap_area)
ENDPROC(v4wt_dma_map_area)
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index d8fd4d4bd3d..24659952c27 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -51,7 +51,7 @@ ENTRY(v6_flush_icache_all)
#else
mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(v6_flush_icache_all)
/*
@@ -73,7 +73,7 @@ ENTRY(v6_flush_kern_cache_all)
#else
mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate
#endif
- mov pc, lr
+ ret lr
/*
* v6_flush_cache_all()
@@ -98,7 +98,7 @@ ENTRY(v6_flush_user_cache_all)
* - we have a VIPT cache.
*/
ENTRY(v6_flush_user_cache_range)
- mov pc, lr
+ ret lr
/*
* v6_coherent_kern_range(start,end)
@@ -150,7 +150,7 @@ ENTRY(v6_coherent_user_range)
#else
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
#endif
- mov pc, lr
+ ret lr
/*
* Fault handling for the cache operation above. If the virtual address in r0
@@ -158,7 +158,7 @@ ENTRY(v6_coherent_user_range)
*/
9001:
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
UNWIND(.fnend )
ENDPROC(v6_coherent_user_range)
ENDPROC(v6_coherent_kern_range)
@@ -188,7 +188,7 @@ ENTRY(v6_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4
#endif
- mov pc, lr
+ ret lr
/*
@@ -239,7 +239,7 @@ v6_dma_inv_range:
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* v6_dma_clean_range(start,end)
@@ -262,7 +262,7 @@ v6_dma_clean_range:
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* v6_dma_flush_range(start,end)
@@ -290,7 +290,7 @@ ENTRY(v6_dma_flush_range)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -323,7 +323,7 @@ ENTRY(v6_dma_unmap_area)
teq r2, #DMA_TO_DEVICE
bne v6_dma_inv_range
#endif
- mov pc, lr
+ ret lr
ENDPROC(v6_dma_unmap_area)
.globl v6_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 615c99e38ba..b966656d2c2 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -61,7 +61,7 @@ ENTRY(v7_invalidate_l1)
bgt 1b
dsb st
isb
- mov pc, lr
+ ret lr
ENDPROC(v7_invalidate_l1)
/*
@@ -76,7 +76,7 @@ ENTRY(v7_flush_icache_all)
mov r0, #0
ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_icache_all)
/*
@@ -94,7 +94,7 @@ ENTRY(v7_flush_dcache_louis)
ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr
#ifdef CONFIG_ARM_ERRATA_643719
ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register
- ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do
+ ALT_UP(reteq lr) @ LoUU is zero, so nothing to do
ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p?
biceq r2, r2, #0x0000000f @ clear minor revision number
teqeq r2, r1 @ test for errata affected core and if so...
@@ -102,7 +102,7 @@ ENTRY(v7_flush_dcache_louis)
#endif
ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2
ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2
- moveq pc, lr @ return if level == 0
+ reteq lr @ return if level == 0
mov r10, #0 @ r10 (starting level) = 0
b flush_levels @ start flushing cache levels
ENDPROC(v7_flush_dcache_louis)
@@ -168,7 +168,7 @@ finished:
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dsb st
isb
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_dcache_all)
/*
@@ -191,7 +191,7 @@ ENTRY(v7_flush_kern_cache_all)
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_cache_all)
/*
@@ -209,7 +209,7 @@ ENTRY(v7_flush_kern_cache_louis)
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_cache_louis)
/*
@@ -235,7 +235,7 @@ ENTRY(v7_flush_user_cache_all)
* - we have a VIPT cache.
*/
ENTRY(v7_flush_user_cache_range)
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_user_cache_all)
ENDPROC(v7_flush_user_cache_range)
@@ -296,7 +296,7 @@ ENTRY(v7_coherent_user_range)
ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB
dsb ishst
isb
- mov pc, lr
+ ret lr
/*
* Fault handling for the cache operation above. If the virtual address in r0
@@ -307,7 +307,7 @@ ENTRY(v7_coherent_user_range)
dsb
#endif
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
UNWIND(.fnend )
ENDPROC(v7_coherent_kern_range)
ENDPROC(v7_coherent_user_range)
@@ -336,7 +336,7 @@ ENTRY(v7_flush_kern_dcache_area)
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_dcache_area)
/*
@@ -369,7 +369,7 @@ v7_dma_inv_range:
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_inv_range)
/*
@@ -391,7 +391,7 @@ v7_dma_clean_range:
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_clean_range)
/*
@@ -413,7 +413,7 @@ ENTRY(v7_dma_flush_range)
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_flush_range)
/*
@@ -439,7 +439,7 @@ ENTRY(v7_dma_unmap_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v7_dma_inv_range
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_unmap_area)
__INITDATA
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index c508f41a43b..59424937e52 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = {
.val = PMD_SECT_USER,
.set = "USR",
}, {
- .mask = PMD_SECT_RDONLY,
- .val = PMD_SECT_RDONLY,
+ .mask = L_PMD_SECT_RDONLY,
+ .val = L_PMD_SECT_RDONLY,
.set = "ro",
.clear = "RW",
#elif __LINUX_ARM_ARCH__ >= 6
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
index 99b05f21a59..fda415e4ca8 100644
--- a/arch/arm/mm/l2c-l2x0-resume.S
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -6,6 +6,7 @@
* This code can only be used to if you are running in the secure world.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/hardware/cache-l2x0.h>
.text
@@ -27,7 +28,7 @@ ENTRY(l2c310_early_resume)
@ Check that the address has been initialised
teq r1, #0
- moveq pc, lr
+ reteq lr
@ The prefetch and power control registers are revision dependent
@ and can be written whether or not the L2 cache is enabled
@@ -41,7 +42,7 @@ ENTRY(l2c310_early_resume)
@ Don't setup the L2 cache if it is already enabled
ldr r0, [r1, #L2X0_CTRL]
tst r0, #L2X0_CTRL_EN
- movne pc, lr
+ retne lr
str r3, [r1, #L310_TAG_LATENCY_CTRL]
str r4, [r1, #L310_DATA_LATENCY_CTRL]
@@ -51,7 +52,7 @@ ENTRY(l2c310_early_resume)
str r2, [r1, #L2X0_AUX_CTRL]
mov r9, #L2X0_CTRL_EN
str r9, [r1, #L2X0_CTRL]
- mov pc, lr
+ ret lr
ENDPROC(l2c310_early_resume)
.align
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6e3ba8d112a..8348ed6b2ef 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1434,23 +1434,64 @@ void __init early_paging_init(const struct machine_desc *mdesc,
dsb(ishst);
isb();
- /* remap level 1 table */
+ /*
+ * FIXME: This code is not architecturally compliant: we modify
+ * the mappings in-place, indeed while they are in use by this
+ * very same code. This may lead to unpredictable behaviour of
+ * the CPU.
+ *
+ * Even modifying the mappings in a separate page table does
+ * not resolve this.
+ *
+ * The architecture strongly recommends that when a mapping is
+ * changed, that it is changed by first going via an invalid
+ * mapping and back to the new mapping. This is to ensure that
+ * no TLB conflicts (caused by the TLB having more than one TLB
+ * entry match a translation) can occur. However, doing that
+ * here will result in unmapping the code we are running.
+ */
+ pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
+ /*
+ * Remap level 1 table. This changes the physical addresses
+ * used to refer to the level 2 page tables to the high
+ * physical address alias, leaving everything else the same.
+ */
for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
set_pud(pud0,
__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
pmd0 += PTRS_PER_PMD;
}
- /* remap pmds for kernel mapping */
+ /*
+ * Remap the level 2 table, pointing the mappings at the high
+ * physical address alias of these pages.
+ */
phys = __pa(map_start);
do {
*pmdk++ = __pmd(phys | pmdprot);
phys += PMD_SIZE;
} while (phys < map_end);
+ /*
+ * Ensure that the above updates are flushed out of the cache.
+ * This is not strictly correct; on a system where the caches
+ * are coherent with each other, but the MMU page table walks
+ * may not be coherent, flush_cache_all() may be a no-op, and
+ * this will fail.
+ */
flush_cache_all();
+
+ /*
+ * Re-write the TTBR values to point them at the high physical
+ * alias of the page tables. We expect __va() will work on
+ * cpu_get_pgd(), which returns the value of TTBR0.
+ */
cpu_switch_mm(pgd0, &init_mm);
cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+
+ /* Finally flush any stale TLB values. */
local_flush_bp_all();
local_flush_tlb_all();
}
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index d1a2d05971e..86ee5d47ce3 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -73,7 +73,7 @@
* cpu_arm1020_proc_init()
*/
ENTRY(cpu_arm1020_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1020_reset)
.popsection
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020_reset)
.align 5
ENTRY(cpu_arm1020_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -133,7 +133,7 @@ ENTRY(arm1020_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1020_flush_icache_all)
/*
@@ -169,7 +169,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -200,7 +200,7 @@ ENTRY(arm1020_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -242,7 +242,7 @@ ENTRY(arm1020_coherent_user_range)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -264,7 +264,7 @@ ENTRY(arm1020_flush_kern_dcache_area)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -297,7 +297,7 @@ arm1020_dma_inv_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -320,7 +320,7 @@ arm1020_dma_clean_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -342,7 +342,7 @@ ENTRY(arm1020_dma_flush_range)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -365,7 +365,7 @@ ENDPROC(arm1020_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm1020_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1020_dma_unmap_area)
.globl arm1020_flush_kern_cache_louis
@@ -384,7 +384,7 @@ ENTRY(cpu_arm1020_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_set_pte(ptep, pte)
@@ -441,7 +441,7 @@ ENTRY(cpu_arm1020_set_pte_ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1020_setup, #function
__arm1020_setup:
@@ -460,7 +460,7 @@ __arm1020_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1020_setup, . - __arm1020_setup
/*
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 9d89405c3d0..a6331d78601 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -73,7 +73,7 @@
* cpu_arm1020e_proc_init()
*/
ENTRY(cpu_arm1020e_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020e_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1020e_reset)
.popsection
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020e_reset)
.align 5
ENTRY(cpu_arm1020e_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -133,7 +133,7 @@ ENTRY(arm1020e_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1020e_flush_icache_all)
/*
@@ -168,7 +168,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -197,7 +197,7 @@ ENTRY(arm1020e_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -236,7 +236,7 @@ ENTRY(arm1020e_coherent_user_range)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -257,7 +257,7 @@ ENTRY(arm1020e_flush_kern_dcache_area)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -286,7 +286,7 @@ arm1020e_dma_inv_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -308,7 +308,7 @@ arm1020e_dma_clean_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -328,7 +328,7 @@ ENTRY(arm1020e_dma_flush_range)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -351,7 +351,7 @@ ENDPROC(arm1020e_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm1020e_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1020e_dma_unmap_area)
.globl arm1020e_flush_kern_cache_louis
@@ -369,7 +369,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -407,7 +407,7 @@ ENTRY(cpu_arm1020e_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_set_pte(ptep, pte)
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020e_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1020e_setup, #function
__arm1020e_setup:
@@ -441,7 +441,7 @@ __arm1020e_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1020e_setup, . - __arm1020e_setup
/*
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 6f01a0ae3b3..a126b7a5992 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -62,7 +62,7 @@
* cpu_arm1022_proc_init()
*/
ENTRY(cpu_arm1022_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1022_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1022_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1022_reset)
.popsection
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1022_reset)
.align 5
ENTRY(cpu_arm1022_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -122,7 +122,7 @@ ENTRY(arm1022_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1022_flush_icache_all)
/*
@@ -156,7 +156,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -185,7 +185,7 @@ ENTRY(arm1022_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -225,7 +225,7 @@ ENTRY(arm1022_coherent_user_range)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -246,7 +246,7 @@ ENTRY(arm1022_flush_kern_dcache_area)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -275,7 +275,7 @@ arm1022_dma_inv_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -297,7 +297,7 @@ arm1022_dma_clean_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -317,7 +317,7 @@ ENTRY(arm1022_dma_flush_range)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -340,7 +340,7 @@ ENDPROC(arm1022_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm1022_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1022_dma_unmap_area)
.globl arm1022_flush_kern_cache_louis
@@ -358,7 +358,7 @@ ENTRY(cpu_arm1022_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -389,7 +389,7 @@ ENTRY(cpu_arm1022_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_set_pte_ext(ptep, pte, ext)
@@ -405,7 +405,7 @@ ENTRY(cpu_arm1022_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1022_setup, #function
__arm1022_setup:
@@ -423,7 +423,7 @@ __arm1022_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R..............
#endif
- mov pc, lr
+ ret lr
.size __arm1022_setup, . - __arm1022_setup
/*
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 4799a24b43e..fc294067e97 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -62,7 +62,7 @@
* cpu_arm1026_proc_init()
*/
ENTRY(cpu_arm1026_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1026_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1026_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1026_reset)
.popsection
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1026_reset)
.align 5
ENTRY(cpu_arm1026_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -122,7 +122,7 @@ ENTRY(arm1026_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1026_flush_icache_all)
/*
@@ -151,7 +151,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -180,7 +180,7 @@ ENTRY(arm1026_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -219,7 +219,7 @@ ENTRY(arm1026_coherent_user_range)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -240,7 +240,7 @@ ENTRY(arm1026_flush_kern_dcache_area)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -269,7 +269,7 @@ arm1026_dma_inv_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm1026_dma_clean_range:
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(arm1026_dma_flush_range)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(arm1026_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm1026_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1026_dma_unmap_area)
.globl arm1026_flush_kern_cache_louis
@@ -352,7 +352,7 @@ ENTRY(cpu_arm1026_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -378,7 +378,7 @@ ENTRY(cpu_arm1026_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_set_pte_ext(ptep, pte, ext)
@@ -394,7 +394,7 @@ ENTRY(cpu_arm1026_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1026_setup, #function
__arm1026_setup:
@@ -417,7 +417,7 @@ __arm1026_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1026_setup, . - __arm1026_setup
/*
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index d42c37f9f5b..2baa66b3ac9 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -51,14 +51,14 @@
*/
ENTRY(cpu_arm720_dcache_clean_area)
ENTRY(cpu_arm720_proc_init)
- mov pc, lr
+ ret lr
ENTRY(cpu_arm720_proc_fin)
mrc p15, 0, r0, c1, c0, 0
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* Function: arm720_proc_do_idle(void)
@@ -66,7 +66,7 @@ ENTRY(cpu_arm720_proc_fin)
* Purpose : put the processor in proper idle mode
*/
ENTRY(cpu_arm720_do_idle)
- mov pc, lr
+ ret lr
/*
* Function: arm720_switch_mm(unsigned long pgd_phys)
@@ -81,7 +81,7 @@ ENTRY(cpu_arm720_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ update page table ptr
mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4)
#endif
- mov pc, lr
+ ret lr
/*
* Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
@@ -94,7 +94,7 @@ ENTRY(cpu_arm720_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
#endif
- mov pc, lr
+ ret lr
/*
* Function: arm720_reset
@@ -112,7 +112,7 @@ ENTRY(cpu_arm720_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x2100 @ ..v....s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm720_reset)
.popsection
@@ -128,7 +128,7 @@ __arm710_setup:
bic r0, r0, r5
ldr r5, arm710_cr1_set
orr r0, r0, r5
- mov pc, lr @ __ret (head.S)
+ ret lr @ __ret (head.S)
.size __arm710_setup, . - __arm710_setup
/*
@@ -156,7 +156,7 @@ __arm720_setup:
mrc p15, 0, r0, c1, c0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr @ __ret (head.S)
+ ret lr @ __ret (head.S)
.size __arm720_setup, . - __arm720_setup
/*
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 9b0ae90cbf1..ac1ea6b3bce 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -32,7 +32,7 @@ ENTRY(cpu_arm740_proc_init)
ENTRY(cpu_arm740_do_idle)
ENTRY(cpu_arm740_dcache_clean_area)
ENTRY(cpu_arm740_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm740_proc_fin()
@@ -42,7 +42,7 @@ ENTRY(cpu_arm740_proc_fin)
bic r0, r0, #0x3f000000 @ bank/f/lock/s
bic r0, r0, #0x0000000c @ w-buffer/cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm740_reset(loc)
@@ -56,7 +56,7 @@ ENTRY(cpu_arm740_reset)
mrc p15, 0, ip, c1, c0, 0 @ get ctrl register
bic ip, ip, #0x0000000c @ ............wc..
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm740_reset)
.popsection
@@ -115,7 +115,7 @@ __arm740_setup:
@ need some benchmark
orr r0, r0, #0x0000000d @ MPU/Cache/WB
- mov pc, lr
+ ret lr
.size __arm740_setup, . - __arm740_setup
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index f6cc3f63ce3..bf6ba4bc30f 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm7tdmi_proc_init)
ENTRY(cpu_arm7tdmi_do_idle)
ENTRY(cpu_arm7tdmi_dcache_clean_area)
ENTRY(cpu_arm7tdmi_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm7tdmi_proc_fin()
*/
ENTRY(cpu_arm7tdmi_proc_fin)
- mov pc, lr
+ ret lr
/*
* Function: cpu_arm7tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm7tdmi_proc_fin)
*/
.pushsection .idmap.text, "ax"
ENTRY(cpu_arm7tdmi_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm7tdmi_reset)
.popsection
.type __arm7tdmi_setup, #function
__arm7tdmi_setup:
- mov pc, lr
+ ret lr
.size __arm7tdmi_setup, . - __arm7tdmi_setup
__INITDATA
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 549557df6d5..22bf8dde4f8 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -63,7 +63,7 @@
* cpu_arm920_proc_init()
*/
ENTRY(cpu_arm920_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm920_proc_fin()
@@ -73,7 +73,7 @@ ENTRY(cpu_arm920_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm920_reset(loc)
@@ -97,7 +97,7 @@ ENTRY(cpu_arm920_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm920_reset)
.popsection
@@ -107,7 +107,7 @@ ENDPROC(cpu_arm920_reset)
.align 5
ENTRY(cpu_arm920_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -120,7 +120,7 @@ ENTRY(cpu_arm920_do_idle)
ENTRY(arm920_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm920_flush_icache_all)
/*
@@ -151,7 +151,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -177,7 +177,7 @@ ENTRY(arm920_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -211,7 +211,7 @@ ENTRY(arm920_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -231,7 +231,7 @@ ENTRY(arm920_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -257,7 +257,7 @@ arm920_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -276,7 +276,7 @@ arm920_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -293,7 +293,7 @@ ENTRY(arm920_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -316,7 +316,7 @@ ENDPROC(arm920_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm920_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm920_dma_unmap_area)
.globl arm920_flush_kern_cache_louis
@@ -332,7 +332,7 @@ ENTRY(cpu_arm920_dcache_clean_area)
add r0, r0, #CACHE_DLINESIZE
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -367,7 +367,7 @@ ENTRY(cpu_arm920_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm920_set_pte(ptep, pte, ext)
@@ -382,7 +382,7 @@ ENTRY(cpu_arm920_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm920_suspend_size
@@ -423,7 +423,7 @@ __arm920_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __arm920_setup, . - __arm920_setup
/*
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 2a758b06c6f..0c6d5ac5a6d 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -65,7 +65,7 @@
* cpu_arm922_proc_init()
*/
ENTRY(cpu_arm922_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm922_proc_fin()
@@ -75,7 +75,7 @@ ENTRY(cpu_arm922_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm922_reset(loc)
@@ -99,7 +99,7 @@ ENTRY(cpu_arm922_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm922_reset)
.popsection
@@ -109,7 +109,7 @@ ENDPROC(cpu_arm922_reset)
.align 5
ENTRY(cpu_arm922_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -122,7 +122,7 @@ ENTRY(cpu_arm922_do_idle)
ENTRY(arm922_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm922_flush_icache_all)
/*
@@ -153,7 +153,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -179,7 +179,7 @@ ENTRY(arm922_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -213,7 +213,7 @@ ENTRY(arm922_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -233,7 +233,7 @@ ENTRY(arm922_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -259,7 +259,7 @@ arm922_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -278,7 +278,7 @@ arm922_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -295,7 +295,7 @@ ENTRY(arm922_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -318,7 +318,7 @@ ENDPROC(arm922_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm922_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm922_dma_unmap_area)
.globl arm922_flush_kern_cache_louis
@@ -336,7 +336,7 @@ ENTRY(cpu_arm922_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -371,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm922_set_pte_ext(ptep, pte, ext)
@@ -386,7 +386,7 @@ ENTRY(cpu_arm922_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm922_setup, #function
__arm922_setup:
@@ -401,7 +401,7 @@ __arm922_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __arm922_setup, . - __arm922_setup
/*
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ba0d58e1a2a..c32d073282e 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -86,7 +86,7 @@
* cpu_arm925_proc_init()
*/
ENTRY(cpu_arm925_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm925_proc_fin()
@@ -96,7 +96,7 @@ ENTRY(cpu_arm925_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm925_reset(loc)
@@ -129,7 +129,7 @@ ENDPROC(cpu_arm925_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
/*
* cpu_arm925_do_idle()
@@ -145,7 +145,7 @@ ENTRY(cpu_arm925_do_idle)
mcr p15, 0, r2, c1, c0, 0 @ Disable I cache
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -155,7 +155,7 @@ ENTRY(cpu_arm925_do_idle)
ENTRY(arm925_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm925_flush_icache_all)
/*
@@ -188,7 +188,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -225,7 +225,7 @@ ENTRY(arm925_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -259,7 +259,7 @@ ENTRY(arm925_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -279,7 +279,7 @@ ENTRY(arm925_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -307,7 +307,7 @@ arm925_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -328,7 +328,7 @@ arm925_dma_clean_range:
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -350,7 +350,7 @@ ENTRY(arm925_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -373,7 +373,7 @@ ENDPROC(arm925_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm925_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm925_dma_unmap_area)
.globl arm925_flush_kern_cache_louis
@@ -390,7 +390,7 @@ ENTRY(cpu_arm925_dcache_clean_area)
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -419,7 +419,7 @@ ENTRY(cpu_arm925_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm925_set_pte_ext(ptep, pte, ext)
@@ -436,7 +436,7 @@ ENTRY(cpu_arm925_set_pte_ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm925_setup, #function
__arm925_setup:
@@ -469,7 +469,7 @@ __arm925_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm925_setup, . - __arm925_setup
/*
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 0f098f407c9..252b2503038 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -55,7 +55,7 @@
* cpu_arm926_proc_init()
*/
ENTRY(cpu_arm926_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm926_proc_fin()
@@ -65,7 +65,7 @@ ENTRY(cpu_arm926_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm926_reset(loc)
@@ -89,7 +89,7 @@ ENTRY(cpu_arm926_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm926_reset)
.popsection
@@ -111,7 +111,7 @@ ENTRY(cpu_arm926_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
msr cpsr_c, r3 @ Restore FIQ state
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -121,7 +121,7 @@ ENTRY(cpu_arm926_do_idle)
ENTRY(arm926_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm926_flush_icache_all)
/*
@@ -151,7 +151,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -188,7 +188,7 @@ ENTRY(arm926_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -222,7 +222,7 @@ ENTRY(arm926_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -242,7 +242,7 @@ ENTRY(arm926_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -270,7 +270,7 @@ arm926_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm926_dma_clean_range:
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -313,7 +313,7 @@ ENTRY(arm926_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -336,7 +336,7 @@ ENDPROC(arm926_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm926_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm926_dma_unmap_area)
.globl arm926_flush_kern_cache_louis
@@ -353,7 +353,7 @@ ENTRY(cpu_arm926_dcache_clean_area)
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -380,7 +380,7 @@ ENTRY(cpu_arm926_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm926_set_pte_ext(ptep, pte, ext)
@@ -397,7 +397,7 @@ ENTRY(cpu_arm926_set_pte_ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm926_suspend_size
@@ -448,7 +448,7 @@ __arm926_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm926_setup, . - __arm926_setup
/*
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1c39a704ff6..e5212d48937 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -31,7 +31,7 @@
*/
ENTRY(cpu_arm940_proc_init)
ENTRY(cpu_arm940_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm940_proc_fin()
@@ -41,7 +41,7 @@ ENTRY(cpu_arm940_proc_fin)
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm940_reset(loc)
@@ -58,7 +58,7 @@ ENTRY(cpu_arm940_reset)
bic ip, ip, #0x00000005 @ .............c.p
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm940_reset)
.popsection
@@ -68,7 +68,7 @@ ENDPROC(cpu_arm940_reset)
.align 5
ENTRY(cpu_arm940_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -78,7 +78,7 @@ ENTRY(cpu_arm940_do_idle)
ENTRY(arm940_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm940_flush_icache_all)
/*
@@ -122,7 +122,7 @@ ENTRY(arm940_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -170,7 +170,7 @@ ENTRY(arm940_flush_kern_dcache_area)
bcs 1b @ segments 7 to 0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -191,7 +191,7 @@ arm940_dma_inv_range:
subs r1, r1, #1 << 4
bcs 1b @ segments 7 to 0
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -215,7 +215,7 @@ ENTRY(cpu_arm940_dcache_clean_area)
bcs 1b @ segments 7 to 0
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -241,7 +241,7 @@ ENTRY(arm940_dma_flush_range)
subs r1, r1, #1 << 4
bcs 1b @ segments 7 to 0
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -264,7 +264,7 @@ ENDPROC(arm940_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm940_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm940_dma_unmap_area)
.globl arm940_flush_kern_cache_louis
@@ -337,7 +337,7 @@ __arm940_setup:
orr r0, r0, #0x00001000 @ I-cache
orr r0, r0, #0x00000005 @ MPU/D-cache
- mov pc, lr
+ ret lr
.size __arm940_setup, . - __arm940_setup
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 0289cd905e7..b3dd9b2d0b8 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -38,7 +38,7 @@
*/
ENTRY(cpu_arm946_proc_init)
ENTRY(cpu_arm946_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm946_proc_fin()
@@ -48,7 +48,7 @@ ENTRY(cpu_arm946_proc_fin)
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm946_reset(loc)
@@ -65,7 +65,7 @@ ENTRY(cpu_arm946_reset)
bic ip, ip, #0x00000005 @ .............c.p
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm946_reset)
.popsection
@@ -75,7 +75,7 @@ ENDPROC(cpu_arm946_reset)
.align 5
ENTRY(cpu_arm946_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -85,7 +85,7 @@ ENTRY(cpu_arm946_do_idle)
ENTRY(arm946_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm946_flush_icache_all)
/*
@@ -117,7 +117,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ flush I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -156,7 +156,7 @@ ENTRY(arm946_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -191,7 +191,7 @@ ENTRY(arm946_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -212,7 +212,7 @@ ENTRY(arm946_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -239,7 +239,7 @@ arm946_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -260,7 +260,7 @@ arm946_dma_clean_range:
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -284,7 +284,7 @@ ENTRY(arm946_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -307,7 +307,7 @@ ENDPROC(arm946_dma_map_area)
* - dir - DMA direction
*/
ENTRY(arm946_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm946_dma_unmap_area)
.globl arm946_flush_kern_cache_louis
@@ -324,7 +324,7 @@ ENTRY(cpu_arm946_dcache_clean_area)
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.type __arm946_setup, #function
__arm946_setup:
@@ -392,7 +392,7 @@ __arm946_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x00004000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm946_setup, . - __arm946_setup
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index f51197ba754..8227322bbb8 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm9tdmi_proc_init)
ENTRY(cpu_arm9tdmi_do_idle)
ENTRY(cpu_arm9tdmi_dcache_clean_area)
ENTRY(cpu_arm9tdmi_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm9tdmi_proc_fin()
*/
ENTRY(cpu_arm9tdmi_proc_fin)
- mov pc, lr
+ ret lr
/*
* Function: cpu_arm9tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm9tdmi_proc_fin)
*/
.pushsection .idmap.text, "ax"
ENTRY(cpu_arm9tdmi_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm9tdmi_reset)
.popsection
.type __arm9tdmi_setup, #function
__arm9tdmi_setup:
- mov pc, lr
+ ret lr
.size __arm9tdmi_setup, . - __arm9tdmi_setup
__INITDATA
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 2dfc0f1d3bf..c494886892b 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -32,7 +32,7 @@
* cpu_fa526_proc_init()
*/
ENTRY(cpu_fa526_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_fa526_proc_fin()
@@ -44,7 +44,7 @@ ENTRY(cpu_fa526_proc_fin)
mcr p15, 0, r0, c1, c0, 0 @ disable caches
nop
nop
- mov pc, lr
+ ret lr
/*
* cpu_fa526_reset(loc)
@@ -72,7 +72,7 @@ ENTRY(cpu_fa526_reset)
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
nop
nop
- mov pc, r0
+ ret r0
ENDPROC(cpu_fa526_reset)
.popsection
@@ -81,7 +81,7 @@ ENDPROC(cpu_fa526_reset)
*/
.align 4
ENTRY(cpu_fa526_do_idle)
- mov pc, lr
+ ret lr
ENTRY(cpu_fa526_dcache_clean_area)
@@ -90,7 +90,7 @@ ENTRY(cpu_fa526_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -117,7 +117,7 @@ ENTRY(cpu_fa526_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
#endif
- mov pc, lr
+ ret lr
/*
* cpu_fa526_set_pte_ext(ptep, pte, ext)
@@ -133,7 +133,7 @@ ENTRY(cpu_fa526_set_pte_ext)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.type __fa526_setup, #function
__fa526_setup:
@@ -162,7 +162,7 @@ __fa526_setup:
bic r0, r0, r5
ldr r5, fa526_cr1_set
orr r0, r0, r5
- mov pc, lr
+ ret lr
.size __fa526_setup, . - __fa526_setup
/*
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index db79b62c92f..03a1b75f2e1 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -69,7 +69,7 @@ ENTRY(cpu_feroceon_proc_init)
movne r2, r2, lsr #2 @ turned into # of sets
sub r2, r2, #(1 << 5)
stmia r1, {r2, r3}
- mov pc, lr
+ ret lr
/*
* cpu_feroceon_proc_fin()
@@ -86,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_feroceon_reset(loc)
@@ -110,7 +110,7 @@ ENTRY(cpu_feroceon_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_feroceon_reset)
.popsection
@@ -124,7 +124,7 @@ ENTRY(cpu_feroceon_do_idle)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -134,7 +134,7 @@ ENTRY(cpu_feroceon_do_idle)
ENTRY(feroceon_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(feroceon_flush_icache_all)
/*
@@ -169,7 +169,7 @@ __flush_whole_cache:
mov ip, #0
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -198,7 +198,7 @@ ENTRY(feroceon_flush_user_cache_range)
tst r2, #VM_EXEC
mov ip, #0
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -233,7 +233,7 @@ ENTRY(feroceon_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -254,7 +254,7 @@ ENTRY(feroceon_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
ENTRY(feroceon_range_flush_kern_dcache_area)
@@ -268,7 +268,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -295,7 +295,7 @@ feroceon_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
feroceon_range_dma_inv_range:
@@ -311,7 +311,7 @@ feroceon_range_dma_inv_range:
mcr p15, 5, r0, c15, c14, 0 @ D inv range start
mcr p15, 5, r1, c15, c14, 1 @ D inv range top
msr cpsr_c, r2 @ restore interrupts
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -331,7 +331,7 @@ feroceon_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
feroceon_range_dma_clean_range:
@@ -344,7 +344,7 @@ feroceon_range_dma_clean_range:
mcr p15, 5, r1, c15, c13, 1 @ D clean range top
msr cpsr_c, r2 @ restore interrupts
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -362,7 +362,7 @@ ENTRY(feroceon_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
ENTRY(feroceon_range_dma_flush_range)
@@ -375,7 +375,7 @@ ENTRY(feroceon_range_dma_flush_range)
mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top
msr cpsr_c, r2 @ restore interrupts
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -412,7 +412,7 @@ ENDPROC(feroceon_range_dma_map_area)
* - dir - DMA direction
*/
ENTRY(feroceon_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(feroceon_dma_unmap_area)
.globl feroceon_flush_kern_cache_louis
@@ -461,7 +461,7 @@ ENTRY(cpu_feroceon_dcache_clean_area)
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -490,9 +490,9 @@ ENTRY(cpu_feroceon_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, r2
+ ret r2
#else
- mov pc, lr
+ ret lr
#endif
/*
@@ -512,7 +512,7 @@ ENTRY(cpu_feroceon_set_pte_ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
.globl cpu_feroceon_suspend_size
@@ -554,7 +554,7 @@ __feroceon_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __feroceon_setup, . - __feroceon_setup
/*
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 40acba59573..53d393455f1 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -45,7 +45,7 @@
* cpu_mohawk_proc_init()
*/
ENTRY(cpu_mohawk_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_proc_fin()
@@ -55,7 +55,7 @@ ENTRY(cpu_mohawk_proc_fin)
bic r0, r0, #0x1800 @ ...iz...........
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_reset(loc)
@@ -79,7 +79,7 @@ ENTRY(cpu_mohawk_reset)
bic ip, ip, #0x0007 @ .............cam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_mohawk_reset)
.popsection
@@ -93,7 +93,7 @@ ENTRY(cpu_mohawk_do_idle)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
@@ -103,7 +103,7 @@ ENTRY(cpu_mohawk_do_idle)
ENTRY(mohawk_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(mohawk_flush_icache_all)
/*
@@ -128,7 +128,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
@@ -158,7 +158,7 @@ ENTRY(mohawk_flush_user_cache_range)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -194,7 +194,7 @@ ENTRY(mohawk_coherent_user_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -214,7 +214,7 @@ ENTRY(mohawk_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -240,7 +240,7 @@ mohawk_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -259,7 +259,7 @@ mohawk_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -277,7 +277,7 @@ ENTRY(mohawk_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -300,7 +300,7 @@ ENDPROC(mohawk_dma_map_area)
* - dir - DMA direction
*/
ENTRY(mohawk_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(mohawk_dma_unmap_area)
.globl mohawk_flush_kern_cache_louis
@@ -315,7 +315,7 @@ ENTRY(cpu_mohawk_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_switch_mm(pgd)
@@ -333,7 +333,7 @@ ENTRY(cpu_mohawk_switch_mm)
orr r0, r0, #0x18 @ cache the page table in L2
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_set_pte_ext(ptep, pte, ext)
@@ -346,7 +346,7 @@ ENTRY(cpu_mohawk_set_pte_ext)
mov r0, r0
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.globl cpu_mohawk_suspend_size
.equ cpu_mohawk_suspend_size, 4 * 6
@@ -400,7 +400,7 @@ __mohawk_setup:
mrc p15, 0, r0, c1, c0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __mohawk_setup, . - __mohawk_setup
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index c45319c8f1d..8008a0461cf 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -38,7 +38,7 @@
ENTRY(cpu_sa110_proc_init)
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
- mov pc, lr
+ ret lr
/*
* cpu_sa110_proc_fin()
@@ -50,7 +50,7 @@ ENTRY(cpu_sa110_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_sa110_reset(loc)
@@ -74,7 +74,7 @@ ENTRY(cpu_sa110_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_sa110_reset)
.popsection
@@ -103,7 +103,7 @@ ENTRY(cpu_sa110_do_idle)
mov r0, r0 @ safety
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -121,7 +121,7 @@ ENTRY(cpu_sa110_dcache_clean_area)
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -141,7 +141,7 @@ ENTRY(cpu_sa110_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ldr pc, [sp], #4
#else
- mov pc, lr
+ ret lr
#endif
/*
@@ -157,7 +157,7 @@ ENTRY(cpu_sa110_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.type __sa110_setup, #function
__sa110_setup:
@@ -173,7 +173,7 @@ __sa110_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __sa110_setup, . - __sa110_setup
/*
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 09d241ae2db..89f97ac648a 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -43,7 +43,7 @@ ENTRY(cpu_sa1100_proc_init)
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland
- mov pc, lr
+ ret lr
/*
* cpu_sa1100_proc_fin()
@@ -58,7 +58,7 @@ ENTRY(cpu_sa1100_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_sa1100_reset(loc)
@@ -82,7 +82,7 @@ ENTRY(cpu_sa1100_reset)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_sa1100_reset)
.popsection
@@ -113,7 +113,7 @@ ENTRY(cpu_sa1100_do_idle)
mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -131,7 +131,7 @@ ENTRY(cpu_sa1100_dcache_clean_area)
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -152,7 +152,7 @@ ENTRY(cpu_sa1100_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ldr pc, [sp], #4
#else
- mov pc, lr
+ ret lr
#endif
/*
@@ -168,7 +168,7 @@ ENTRY(cpu_sa1100_set_pte_ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.globl cpu_sa1100_suspend_size
.equ cpu_sa1100_suspend_size, 4 * 3
@@ -211,7 +211,7 @@ __sa1100_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __sa1100_setup, . - __sa1100_setup
/*
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 32b3558321c..d0390f4b3f1 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -36,14 +36,14 @@
#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S
ENTRY(cpu_v6_proc_init)
- mov pc, lr
+ ret lr
ENTRY(cpu_v6_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_v6_reset(loc)
@@ -62,7 +62,7 @@ ENTRY(cpu_v6_reset)
mcr p15, 0, r1, c1, c0, 0 @ disable MMU
mov r1, #0
mcr p15, 0, r1, c7, c5, 4 @ ISB
- mov pc, r0
+ ret r0
ENDPROC(cpu_v6_reset)
.popsection
@@ -77,14 +77,14 @@ ENTRY(cpu_v6_do_idle)
mov r1, #0
mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode
mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt
- mov pc, lr
+ ret lr
ENTRY(cpu_v6_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #D_CACHE_LINE_SIZE
subs r1, r1, #D_CACHE_LINE_SIZE
bhi 1b
- mov pc, lr
+ ret lr
/*
* cpu_v6_switch_mm(pgd_phys, tsk)
@@ -113,7 +113,7 @@ ENTRY(cpu_v6_switch_mm)
#endif
mcr p15, 0, r1, c13, c0, 1 @ set context ID
#endif
- mov pc, lr
+ ret lr
/*
* cpu_v6_set_pte_ext(ptep, pte, ext)
@@ -131,7 +131,7 @@ ENTRY(cpu_v6_set_pte_ext)
#ifdef CONFIG_MMU
armv6_set_pte_ext cpu_v6
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
.globl cpu_v6_suspend_size
@@ -241,7 +241,7 @@ __v6_setup:
mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg
orreq r0, r0, #(1 << 21) @ low interrupt latency configuration
#endif
- mov pc, lr @ return to head.S:__ret
+ ret lr @ return to head.S:__ret
/*
* V X F I D LR
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 1f52915f2b2..ed448d8a596 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
isb
#endif
- mov pc, lr
+ bx lr
ENDPROC(cpu_v7_switch_mm)
/*
@@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext)
ALT_SMP(W(nop))
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
- mov pc, lr
+ bx lr
ENDPROC(cpu_v7_set_pte_ext)
/*
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 22e3ad63500..e4c8acfc132 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -19,6 +19,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <asm/assembler.h>
#define TTB_IRGN_NC (0 << 8)
#define TTB_IRGN_WBWA (1 << 8)
@@ -61,7 +62,7 @@ ENTRY(cpu_v7_switch_mm)
mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0
isb
#endif
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_switch_mm)
#ifdef __ARMEB__
@@ -86,13 +87,18 @@ ENTRY(cpu_v7_set_pte_ext)
tst rh, #1 << (57 - 32) @ L_PTE_NONE
bicne rl, #L_PTE_VALID
bne 1f
- tst rh, #1 << (55 - 32) @ L_PTE_DIRTY
- orreq rl, #L_PTE_RDONLY
+
+ eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to
+ @ test for !L_PTE_DIRTY || L_PTE_RDONLY
+ tst ip, #1 << (55 - 32) | 1 << (58 - 32)
+ orrne rl, #PTE_AP2
+ biceq rl, #PTE_AP2
+
1: strd r2, r3, [r0]
ALT_SMP(W(nop))
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_set_pte_ext)
/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3db2c2f04a3..b5d67db2089 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -26,7 +26,7 @@
#endif
ENTRY(cpu_v7_proc_init)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_proc_init)
ENTRY(cpu_v7_proc_fin)
@@ -34,7 +34,7 @@ ENTRY(cpu_v7_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_proc_fin)
/*
@@ -71,20 +71,20 @@ ENDPROC(cpu_v7_reset)
ENTRY(cpu_v7_do_idle)
dsb @ WFI may enter a low-power mode
wfi
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_do_idle)
ENTRY(cpu_v7_dcache_clean_area)
ALT_SMP(W(nop)) @ MP extensions imply L1 PTW
ALT_UP_B(1f)
- mov pc, lr
+ ret lr
1: dcache_line_size r2, r3
2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, r2
subs r1, r1, r2
bhi 2b
dsb ishst
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_dcache_clean_area)
string cpu_v7_name, "ARMv7 Processor"
@@ -152,6 +152,40 @@ ENTRY(cpu_v7_do_resume)
ENDPROC(cpu_v7_do_resume)
#endif
+/*
+ * Cortex-A9 processor functions
+ */
+ globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init
+ globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin
+ globl_equ cpu_ca9mp_reset, cpu_v7_reset
+ globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle
+ globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+ globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm
+ globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext
+.globl cpu_ca9mp_suspend_size
+.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_ca9mp_do_suspend)
+ stmfd sp!, {r4 - r5}
+ mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register
+ mrc p15, 0, r5, c15, c0, 0 @ Power register
+ stmia r0!, {r4 - r5}
+ ldmfd sp!, {r4 - r5}
+ b cpu_v7_do_suspend
+ENDPROC(cpu_ca9mp_do_suspend)
+
+ENTRY(cpu_ca9mp_do_resume)
+ ldmia r0!, {r4 - r5}
+ mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register
+ teq r4, r10 @ Already restored?
+ mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it
+ mrc p15, 0, r10, c15, c0, 0 @ Read Power register
+ teq r5, r10 @ Already restored?
+ mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it
+ b cpu_v7_do_resume
+ENDPROC(cpu_ca9mp_do_resume)
+#endif
+
#ifdef CONFIG_CPU_PJ4B
globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm
globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext
@@ -163,7 +197,7 @@ ENTRY(cpu_pj4b_do_idle)
dsb @ WFI may enter a low-power mode
wfi
dsb @barrier
- mov pc, lr
+ ret lr
ENDPROC(cpu_pj4b_do_idle)
#else
globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle
@@ -184,16 +218,16 @@ ENDPROC(cpu_pj4b_do_suspend)
ENTRY(cpu_pj4b_do_resume)
ldmia r0!, {r6 - r10}
- mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features
- mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0
- mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2
- mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1
- mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC
+ mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features
+ mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0
+ mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2
+ mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1
+ mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC
b cpu_v7_do_resume
ENDPROC(cpu_pj4b_do_resume)
#endif
.globl cpu_pj4b_suspend_size
-.equ cpu_pj4b_suspend_size, 4 * 14
+.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
#endif
@@ -216,6 +250,7 @@ __v7_cr7mp_setup:
__v7_ca7mp_setup:
__v7_ca12mp_setup:
__v7_ca15mp_setup:
+__v7_b15mp_setup:
__v7_ca17mp_setup:
mov r10, #0
1:
@@ -407,7 +442,7 @@ __v7_setup:
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
- mov pc, lr @ return to head.S:__ret
+ ret lr @ return to head.S:__ret
ENDPROC(__v7_setup)
.align 2
@@ -418,6 +453,7 @@ __v7_setup_stack:
@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+ define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#ifdef CONFIG_CPU_PJ4B
define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#endif
@@ -470,7 +506,7 @@ __v7_ca5mp_proc_info:
__v7_ca9mp_proc_info:
.long 0x410fc090
.long 0xff0ffff0
- __v7_proc __v7_ca9mp_setup
+ __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
#endif /* CONFIG_ARM_LPAE */
@@ -528,6 +564,16 @@ __v7_ca15mp_proc_info:
.size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
/*
+ * Broadcom Corporation Brahma-B15 processor.
+ */
+ .type __v7_b15mp_proc_info, #object
+__v7_b15mp_proc_info:
+ .long 0x420f00f0
+ .long 0xff0ffff0
+ __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV
+ .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info
+
+ /*
* ARM Ltd. Cortex A17 processor.
*/
.type __v7_ca17mp_proc_info, #object
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 1ca37c72f12..d1e68b553d3 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -16,11 +16,11 @@
#include "proc-macros.S"
ENTRY(cpu_v7m_proc_init)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_proc_init)
ENTRY(cpu_v7m_proc_fin)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_proc_fin)
/*
@@ -34,7 +34,7 @@ ENDPROC(cpu_v7m_proc_fin)
*/
.align 5
ENTRY(cpu_v7m_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_v7m_reset)
/*
@@ -46,18 +46,18 @@ ENDPROC(cpu_v7m_reset)
*/
ENTRY(cpu_v7m_do_idle)
wfi
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_idle)
ENTRY(cpu_v7m_dcache_clean_area)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_dcache_clean_area)
/*
* There is no MMU, so here is nothing to do.
*/
ENTRY(cpu_v7m_switch_mm)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_switch_mm)
.globl cpu_v7m_suspend_size
@@ -65,11 +65,11 @@ ENDPROC(cpu_v7m_switch_mm)
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_v7m_do_suspend)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_suspend)
ENTRY(cpu_v7m_do_resume)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_resume)
#endif
@@ -120,7 +120,7 @@ __v7m_setup:
ldr r12, [r0, V7M_SCB_CCR] @ system control register
orr r12, #V7M_SCB_CCR_STKALIGN
str r12, [r0, V7M_SCB_CCR]
- mov pc, lr
+ ret lr
ENDPROC(__v7m_setup)
.align 2
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index dc164589004..f8acdfece03 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -83,7 +83,7 @@
* Nothing too exciting at the moment
*/
ENTRY(cpu_xsc3_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_xsc3_proc_fin()
@@ -93,7 +93,7 @@ ENTRY(cpu_xsc3_proc_fin)
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_xsc3_reset(loc)
@@ -119,7 +119,7 @@ ENTRY(cpu_xsc3_reset)
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
- mov pc, r0
+ ret r0
ENDPROC(cpu_xsc3_reset)
.popsection
@@ -138,7 +138,7 @@ ENDPROC(cpu_xsc3_reset)
ENTRY(cpu_xsc3_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ go to idle
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -150,7 +150,7 @@ ENTRY(cpu_xsc3_do_idle)
ENTRY(xsc3_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(xsc3_flush_icache_all)
/*
@@ -176,7 +176,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, vm_flags)
@@ -205,7 +205,7 @@ ENTRY(xsc3_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -232,7 +232,7 @@ ENTRY(xsc3_coherent_user_range)
mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -253,7 +253,7 @@ ENTRY(xsc3_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -277,7 +277,7 @@ xsc3_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -294,7 +294,7 @@ xsc3_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(xsc3_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(xsc3_dma_map_area)
* - dir - DMA direction
*/
ENTRY(xsc3_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(xsc3_dma_unmap_area)
.globl xsc3_flush_kern_cache_louis
@@ -348,7 +348,7 @@ ENTRY(cpu_xsc3_dcache_clean_area)
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
orr r2, r2, ip
xscale_set_pte_ext_epilogue
- mov pc, lr
+ ret lr
.ltorg
.align
@@ -478,7 +478,7 @@ __xsc3_setup:
bic r0, r0, r5 @ ..V. ..R. .... ..A.
orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu)
@ ...I Z..S .... .... (uc)
- mov pc, lr
+ ret lr
.size __xsc3_setup, . - __xsc3_setup
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index d19b1cfcad9..23259f104c6 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -118,7 +118,7 @@ ENTRY(cpu_xscale_proc_init)
mrc p15, 0, r1, c1, c0, 1
bic r1, r1, #1
mcr p15, 0, r1, c1, c0, 1
- mov pc, lr
+ ret lr
/*
* cpu_xscale_proc_fin()
@@ -128,7 +128,7 @@ ENTRY(cpu_xscale_proc_fin)
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_xscale_reset(loc)
@@ -160,7 +160,7 @@ ENTRY(cpu_xscale_reset)
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, r0
+ ret r0
ENDPROC(cpu_xscale_reset)
.popsection
@@ -179,7 +179,7 @@ ENDPROC(cpu_xscale_reset)
ENTRY(cpu_xscale_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
@@ -191,7 +191,7 @@ ENTRY(cpu_xscale_do_idle)
ENTRY(xscale_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(xscale_flush_icache_all)
/*
@@ -216,7 +216,7 @@ __flush_whole_cache:
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, vm_flags)
@@ -245,7 +245,7 @@ ENTRY(xscale_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
@@ -269,7 +269,7 @@ ENTRY(xscale_coherent_kern_range)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* coherent_user_range(start, end)
@@ -291,7 +291,7 @@ ENTRY(xscale_coherent_user_range)
mov r0, #0
mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -312,7 +312,7 @@ ENTRY(xscale_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
@@ -336,7 +336,7 @@ xscale_dma_inv_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
@@ -353,7 +353,7 @@ xscale_dma_clean_range:
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
@@ -371,7 +371,7 @@ ENTRY(xscale_dma_flush_range)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
@@ -407,7 +407,7 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area)
* - dir - DMA direction
*/
ENTRY(xscale_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(xscale_dma_unmap_area)
.globl xscale_flush_kern_cache_louis
@@ -458,7 +458,7 @@ ENTRY(cpu_xscale_dcache_clean_area)
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
@@ -521,7 +521,7 @@ ENTRY(cpu_xscale_set_pte_ext)
orr r2, r2, ip
xscale_set_pte_ext_epilogue
- mov pc, lr
+ ret lr
.ltorg
.align
@@ -572,7 +572,7 @@ __xscale_setup:
mrc p15, 0, r0, c1, c0, 0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __xscale_setup, . - __xscale_setup
/*
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
index d3ddcf9a76c..d2d9ecbe0aa 100644
--- a/arch/arm/mm/tlb-fa.S
+++ b/arch/arm/mm/tlb-fa.S
@@ -18,6 +18,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
@@ -37,7 +38,7 @@ ENTRY(fa_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
bic r0, r0, #0x0ff
@@ -47,7 +48,7 @@ ENTRY(fa_flush_user_tlb_range)
cmp r0, r1
blo 1b
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
ENTRY(fa_flush_kern_tlb_range)
@@ -61,7 +62,7 @@ ENTRY(fa_flush_kern_tlb_range)
blo 1b
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb)
- mov pc, lr
+ ret lr
__INITDATA
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
index 17a025ade57..a2b5dca4204 100644
--- a/arch/arm/mm/tlb-v4.S
+++ b/arch/arm/mm/tlb-v4.S
@@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
.v4_flush_kern_tlb_range:
bic r0, r0, #0x0ff
bic r0, r0, #0xf00
@@ -41,7 +42,7 @@ ENTRY(v4_flush_user_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* v4_flush_kern_tlb_range(start, end)
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
index c04598fa4d4..5a093b458db 100644
--- a/arch/arm/mm/tlb-v4wb.S
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4wb_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
vma_vm_flags r2, r2
mcr p15, 0, r3, c7, c10, 4 @ drain WB
tst r2, #VM_EXEC
@@ -44,7 +45,7 @@ ENTRY(v4wb_flush_user_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* v4_flush_kern_tlb_range(start, end)
@@ -65,7 +66,7 @@ ENTRY(v4wb_flush_kern_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
__INITDATA
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
index 1f6062b6c1c..058861548f6 100644
--- a/arch/arm/mm/tlb-v4wbi.S
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
@@ -32,7 +33,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
vma_vm_flags r2, r2
@@ -44,7 +45,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
ENTRY(v4wbi_flush_kern_tlb_range)
mov r3, #0
@@ -56,7 +57,7 @@ ENTRY(v4wbi_flush_kern_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
__INITDATA
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index eca07f550a0..6f689be638b 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
@@ -55,7 +56,7 @@ ENTRY(v6wbi_flush_user_tlb_range)
cmp r0, r1
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier
- mov pc, lr
+ ret lr
/*
* v6wbi_flush_kern_tlb_range(start,end)
@@ -84,7 +85,7 @@ ENTRY(v6wbi_flush_kern_tlb_range)
blo 1b
mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier
mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb)
- mov pc, lr
+ ret lr
__INIT
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 355308767ba..e5101a3bc57 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -57,7 +57,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
cmp r0, r1
blo 1b
dsb ish
- mov pc, lr
+ ret lr
ENDPROC(v7wbi_flush_user_tlb_range)
/*
@@ -86,7 +86,7 @@ ENTRY(v7wbi_flush_kern_tlb_range)
blo 1b
dsb ish
isb
- mov pc, lr
+ ret lr
ENDPROC(v7wbi_flush_kern_tlb_range)
__INIT
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d18dde95b8a..5d65be1f1e8 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -19,7 +19,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
+#include <asm/assembler.h>
#include <asm/opcodes.h>
/* This is the kernel's entry point into the floating point emulator.
@@ -92,7 +92,7 @@ emulate:
mov r0, r6 @ prepare for EmulateAll()
bl EmulateAll @ emulate the instruction
cmp r0, #0 @ was emulation successful
- moveq pc, r4 @ no, return failure
+ reteq r4 @ no, return failure
next:
.Lx1: ldrt r6, [r5], #4 @ get the next instruction and
@@ -102,7 +102,7 @@ next:
teq r2, #0x0C000000
teqne r2, #0x0D000000
teqne r2, #0x0E000000
- movne pc, r9 @ return ok if not a fp insn
+ retne r9 @ return ok if not a fp insn
str r5, [sp, #S_PC] @ update PC copy in regs
@@ -115,7 +115,7 @@ next:
@ plain LDR instruction. Weird, but it seems harmless.
.pushsection .fixup,"ax"
.align 2
-.Lfix: mov pc, r9 @ let the user eat segfaults
+.Lfix: ret r9 @ let the user eat segfaults
.popsection
.pushsection __ex_table,"a"
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 99c63d4b6af..cc649a1e46d 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -33,12 +33,14 @@ static struct op_perf_name {
char *perf_name;
char *op_name;
} op_perf_name_map[] = {
- { "xscale1", "arm/xscale1" },
- { "xscale1", "arm/xscale2" },
- { "v6", "arm/armv6" },
- { "v6mpcore", "arm/mpcore" },
- { "ARMv7 Cortex-A8", "arm/armv7" },
- { "ARMv7 Cortex-A9", "arm/armv7-ca9" },
+ { "armv5_xscale1", "arm/xscale1" },
+ { "armv5_xscale2", "arm/xscale2" },
+ { "armv6_1136", "arm/armv6" },
+ { "armv6_1156", "arm/armv6" },
+ { "armv6_1176", "arm/armv6" },
+ { "armv6_11mpcore", "arm/mpcore" },
+ { "armv7_cortex_a8", "arm/armv7" },
+ { "armv7_cortex_a9", "arm/armv7-ca9" },
};
char *op_name_from_perf_id(void)
@@ -107,10 +109,7 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
if (!user_mode(regs)) {
struct stackframe frame;
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
- frame.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &frame);
walk_stackframe(&frame, report_trace, &depth);
return;
}
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index b5608b1f9fb..1c98659bbf8 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -698,6 +698,8 @@ int omap_request_dma(int dev_id, const char *dev_name,
unsigned long flags;
struct omap_dma_lch *chan;
+ WARN(strcmp(dev_name, "DMA engine"), "Using deprecated platform DMA API - please update to DMA engine");
+
spin_lock_irqsave(&dma_chan_lock, flags);
for (ch = 0; ch < dma_chan_count; ch++) {
if (free_ch == -1 && dma_chan[ch].dev_id == -1) {
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index fe6ca574d09..2e78760f349 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -34,7 +34,7 @@ ENDPROC(do_vfp)
ENTRY(vfp_null_entry)
dec_preempt_count_ti r10, r4
- mov pc, lr
+ ret lr
ENDPROC(vfp_null_entry)
.align 2
@@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry)
dec_preempt_count_ti r10, r4
ldr r0, VFP_arch_address
str r0, [r0] @ set to non-zero value
- mov pc, r9 @ we have handled the fault
+ ret r9 @ we have handled the fault
ENDPROC(vfp_testing_entry)
.align 2
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index be807625ed8..cda654cbf2c 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -183,7 +183,7 @@ vfp_hw_state_valid:
@ always subtract 4 from the following
@ instruction address.
dec_preempt_count_ti r10, r4
- mov pc, r9 @ we think we have handled things
+ ret r9 @ we think we have handled things
look_for_VFP_exceptions:
@@ -202,7 +202,7 @@ look_for_VFP_exceptions:
DBGSTR "not VFP"
dec_preempt_count_ti r10, r4
- mov pc, lr
+ ret lr
process_exception:
DBGSTR "bounce"
@@ -234,7 +234,7 @@ ENTRY(vfp_save_state)
VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present)
1:
stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2
- mov pc, lr
+ ret lr
ENDPROC(vfp_save_state)
.align
@@ -245,7 +245,7 @@ vfp_current_hw_state_address:
#ifdef CONFIG_THUMB2_KERNEL
adr \tmp, 1f
add \tmp, \tmp, \base, lsl \shift
- mov pc, \tmp
+ ret \tmp
#else
add pc, pc, \base, lsl \shift
mov r0, r0
@@ -257,10 +257,10 @@ ENTRY(vfp_get_float)
tbl_branch r0, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0
- mov pc, lr
+ ret lr
.org 1b + 8
1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
ENDPROC(vfp_get_float)
@@ -269,10 +269,10 @@ ENTRY(vfp_put_float)
tbl_branch r1, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0
- mov pc, lr
+ ret lr
.org 1b + 8
1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
ENDPROC(vfp_put_float)
@@ -281,14 +281,14 @@ ENTRY(vfp_get_double)
tbl_branch r0, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: fmrrd r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
@ d16 - d31 registers
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#endif
@@ -296,21 +296,21 @@ ENTRY(vfp_get_double)
@ virtual register 16 (or 32 if VFPv3) for compare with zero
mov r0, #0
mov r1, #0
- mov pc, lr
+ ret lr
ENDPROC(vfp_get_double)
ENTRY(vfp_put_double)
tbl_branch r2, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: fmdrr d\dr, r0, r1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
@ d16 - d31 registers
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#endif
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 44e3a5f10c4..f00e0807593 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -58,7 +58,7 @@
ENTRY(HYPERVISOR_##hypercall) \
mov r12, #__HYPERVISOR_##hypercall; \
__HVC(XEN_IMM); \
- mov pc, lr; \
+ ret lr; \
ENDPROC(HYPERVISOR_##hypercall)
#define HYPERCALL0 HYPERCALL_SIMPLE
@@ -74,7 +74,7 @@ ENTRY(HYPERVISOR_##hypercall) \
mov r12, #__HYPERVISOR_##hypercall; \
__HVC(XEN_IMM); \
ldm sp!, {r4} \
- mov pc, lr \
+ ret lr \
ENDPROC(HYPERVISOR_##hypercall)
.text
@@ -101,5 +101,5 @@ ENTRY(privcmd_call)
ldr r4, [sp, #4]
__HVC(XEN_IMM)
ldm sp!, {r4}
- mov pc, lr
+ ret lr
ENDPROC(privcmd_call);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 6345c470650..00b5906f57b 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -541,6 +541,17 @@ config CRYPTO_SHA1_ARM
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM assembler.
+config CRYPTO_SHA1_ARM_NEON
+ tristate "SHA1 digest algorithm (ARM NEON)"
+ depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+ select CRYPTO_SHA1_ARM
+ select CRYPTO_SHA1
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+ using optimized ARM NEON assembly, when NEON instructions are
+ available.
+
config CRYPTO_SHA1_PPC
tristate "SHA1 digest algorithm (powerpc)"
depends on PPC
@@ -590,6 +601,21 @@ config CRYPTO_SHA512_SPARC64
SHA-512 secure hash standard (DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
+config CRYPTO_SHA512_ARM_NEON
+ tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+ depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+ select CRYPTO_SHA512
+ select CRYPTO_HASH
+ help
+ SHA-512 secure hash standard (DFIPS 180-2) implemented
+ using ARM NEON instructions, when available.
+
+ This version of SHA implements a 512 bit hash with 256 bits of
+ security against collision attacks.
+
+ This code also includes SHA-384, a 384 bit hash with 192 bits
+ of security against collision attacks.
+
config CRYPTO_TGR192
tristate "Tiger digest algorithms"
select CRYPTO_HASH
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 60e5a170c4d..e6833771a71 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -250,7 +250,7 @@ static void __init global_timer_of_register(struct device_node *np)
* fire when the timer value is greater than or equal to. In previous
* revisions the comparators fired when the timer value was equal to.
*/
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9
&& (read_cpuid_id() & 0xf0000f) < 0x200000) {
pr_warn("global-timer: non support for this cpu version.\n");
return;