From dd78bc11fb2050b6a3990d0421feca4c68ca4335 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 23 Jul 2013 17:32:04 -0400 Subject: tile: convert uses of "inv" to "finv" The "inv" (invalidate) instruction is generally less safe than "finv" (flush and invalidate), as it will drop dirty data from the cache. It turns out we have almost no need for "inv" (other than for the older 32-bit architecture in some limited cases), so convert to "finv" where possible and delete the extra "inv" infrastructure. Signed-off-by: Chris Metcalf --- arch/tile/lib/cacheflush.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/tile/lib/cacheflush.c') diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8f8ad814b13..2238b40abf3 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -147,18 +147,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) force_load(p); /* - * Repeat, but with inv's instead of loads, to get rid of the + * Repeat, but with finv's instead of loads, to get rid of the * data we just loaded into our own cache and the old home L3. - * No need to unroll since inv's don't target a register. + * No need to unroll since finv's don't target a register. + * The finv's are guaranteed not to actually flush the data in + * the buffer back to their home, since we just read it, so the + * lines are clean in cache; we will only invalidate those lines. */ p = (char *)buffer + size - 1; - __insn_inv(p); + __insn_finv(p); p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); for (; p >= base; p -= step_size) - __insn_inv(p); + __insn_finv(p); - /* Wait for the load+inv's (and thus finvs) to have completed. */ + /* Wait for these finv's (and thus the first finvs) to be done. */ __insn_mf(); #ifdef __tilegx__ -- cgit v1.2.3-70-g09d2 From 49cf78ef7bb34833496d59b6dfe84ae51b1ab097 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 12 Aug 2013 15:00:51 -0400 Subject: tile: support FRAME_POINTER Allow enabling frame pointer support; this makes it easier to hook into the various kernel features that claim they require it without having to add Kconfig conditionals everywhere (a la mips, ppc, s390, and microblaze). When enabled, it basically eliminates leaf functions as such, and stops optimizing tail and sibling calls. It adds around 3% to the size of the kernel when enabled. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 1 + arch/tile/lib/Makefile | 14 +++++++------- arch/tile/lib/cacheflush.c | 3 ++- arch/tile/lib/memcpy_64.c | 3 ++- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/tile/lib/cacheflush.c') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index de599c14c4f..b2be4252448 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -26,6 +26,7 @@ config TILE select HAVE_SYSCALL_TRACEPOINTS select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_DEBUG_STACKOVERFLOW + select ARCH_WANT_FRAME_POINTERS # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 5d844374b2b..9adfd76fbdd 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -6,13 +6,13 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o -ifeq ($(CONFIG_TILEGX),y) -CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer -lib-y += memcpy_user_64.o -else -lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o -endif - +lib-$(CONFIG_TILEGX) += memcpy_user_64.o +lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o memcpy_tile64.o lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o obj-$(CONFIG_MODULES) += exports.o + +# The finv_buffer_remote() and copy_{to,from}_user() routines can't +# have -pg added, since they both rely on being leaf functions. +CFLAGS_REMOVE_cacheflush.o = -pg +CFLAGS_REMOVE_memcpy_user_64.o = -pg diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 2238b40abf3..9c0ec22009a 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -36,7 +36,8 @@ static inline void force_load(char *p) * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting * until the memory controller holds the flushed values. */ -void finv_buffer_remote(void *buffer, size_t size, int hfh) +void __attribute__((optimize("omit-frame-pointer"))) +finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c index 46fc1600c17..4815354b8cd 100644 --- a/arch/tile/lib/memcpy_64.c +++ b/arch/tile/lib/memcpy_64.c @@ -54,7 +54,8 @@ void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n) * macros to return a count of uncopied bytes due to mm fault. */ #define RETVAL 0 -int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) +int __attribute__((optimize("omit-frame-pointer"))) +USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) #endif { char *__restrict dst1 = (char *)dstv; -- cgit v1.2.3-70-g09d2