Pull branch 'for-rmk' of git://git.linaro.org/people/ardbiesheuvel/linux-arm into devel-stable

Comments from Ard Biesheuvel: I have included two use cases that I have been using, XOR and RAID-6 checksumming. The former gets a 60% performance boost on the NEON, the latter over 400%. ARM: add support for kernel mode NEON Adds kernel_neon_begin/end (renamed from kernel_vfp_begin/end in the previous version to de-emphasize the VFP part as VFP code that needs software assistance is not supported currently.) Introduces <asm/neon.h> and the Kconfig symbol KERNEL_MODE_NEON. This has been aligned with Catalin for arm64, so any NEON code that does not use assembly but intrinsics or the GCC vectorizer (such as my examples) can potentially be shared between arm and arm64 archs. ARM: move VFP init to an earlier boot stage This is needed so the NEON is enabled when the XOR and RAID-6 algo boot time benchmarks are run. ARM: be strict about FP exceptions in kernel mode This adds a check to vfp_support_entry() to flag unsupported uses of the NEON/VFP in kernel mode. FP exceptions (bounces) are flagged as a bug, this is because of their potentially intermittent nature. Exceptions caused by the fact that kernel_neon_begin has not been called are just routed through the undef handler. ARM: crypto: add NEON accelerated XOR implementation This is the xor_blocks() implementation built with -ftree-vectorize, 60% faster than optimized ARM code. It calls in_interrupt() to check whether the NEON flavor can be used: this should really not be necessary, but due to xor_blocks'squite generic nature, there is no telling how exactly people may be using it in the real world. lib/raid6: add ARM-NEON accelerated syndrome calculation This is a port of the RAID-6 checksumming code in altivec.uc ported to use NEON intrinsics. It is about 4x faster than the sequential code.
author: Russell King <rmk+kernel@arm.linux.org.uk> 2013-07-22 17:26:27 +0100
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2013-07-22 17:46:40 +0100
commit: b4f656eea63376da79b0b5a17660c4ce14b71b74 (patch)
tree: 36731cf326eca19cd2164f886b3eaa31449fe8fa /lib/raid6/test/Makefile
parent: 3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff)
parent: 7d11965ddb9b9b1e0a5d13c58345ada1ccbc663b (diff)
1 files changed, 25 insertions, 1 deletions
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 087332dbf8a..28afa1a06e0 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -22,11 +22,23 @@ ifeq ($(ARCH),x86_64)
         IS_X86 = yes
 endif
 
+ifeq ($(ARCH),arm)
+        CFLAGS += -I../../../arch/arm/include -mfpu=neon
+        HAS_NEON = yes
+endif
+ifeq ($(ARCH),arm64)
+        CFLAGS += -I../../../arch/arm64/include
+        HAS_NEON = yes
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
         CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" |	\
                     gcc -c -x assembler - >&/dev/null &&	\
                     rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+else ifeq ($(HAS_NEON),yes)
+        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
+        CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
                          gcc -c -x c - >&/dev/null && \
@@ -55,6 +67,18 @@ raid6.a: $(OBJS)
 raid6test: test.c raid6.a
 	$(CC) $(CFLAGS) -o raid6test $^
 
+neon1.c: neon.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < neon.uc > $@
+
+neon2.c: neon.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < neon.uc > $@
+
+neon4.c: neon.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < neon.uc > $@
+
+neon8.c: neon.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < neon.uc > $@
+
 altivec1.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
 
@@ -89,7 +113,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
 
 spotless: clean
 	rm -f *~
author	Russell King <rmk+kernel@arm.linux.org.uk>	2013-07-22 17:26:27 +0100
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2013-07-22 17:46:40 +0100
commit	b4f656eea63376da79b0b5a17660c4ce14b71b74 (patch)
tree	36731cf326eca19cd2164f886b3eaa31449fe8fa /lib/raid6/test/Makefile
parent	3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff)
parent	7d11965ddb9b9b1e0a5d13c58345ada1ccbc663b (diff)