From 8c49d9a74bac5ea3f18480307057241b808fcc0c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 23 May 2011 09:31:24 -0400 Subject: x86-64: Clean up vdso/kernel shared variables Variables that are shared between the vdso and the kernel are currently a bit of a mess. They are each defined with their own magic, they are accessed differently in the kernel, the vsyscall page, and the vdso, and one of them (vsyscall_clock) doesn't even really exist. This changes them all to use a common mechanism. All of them are delcared in vvar.h with a fixed address (validated by the linker script). In the kernel (as before), they look like ordinary read-write variables. In the vsyscall page and the vdso, they are accessed through a new macro VVAR, which gives read-only access. The vdso is now loaded verbatim into memory without any fixups. As a side bonus, access from the vdso is faster because a level of indirection is removed. While we're at it, pack jiffies and vgetcpu_mode into the same cacheline. Signed-off-by: Andy Lutomirski Cc: Andi Kleen Cc: Linus Torvalds Cc: "David S. Miller" Cc: Eric Dumazet Cc: Peter Zijlstra Cc: Borislav Petkov Link: http://lkml.kernel.org/r/%3C7357882fbb51fa30491636a7b6528747301b7ee9.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vdso.h | 14 ----------- arch/x86/include/asm/vgtod.h | 2 -- arch/x86/include/asm/vsyscall.h | 12 ++-------- arch/x86/include/asm/vvar.h | 52 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 26 deletions(-) create mode 100644 arch/x86/include/asm/vvar.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 9064052b73d..bb0522850b7 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -1,20 +1,6 @@ #ifndef _ASM_X86_VDSO_H #define _ASM_X86_VDSO_H -#ifdef CONFIG_X86_64 -extern const char VDSO64_PRELINK[]; - -/* - * Given a pointer to the vDSO image, find the pointer to VDSO64_name - * as that symbol is defined in the vDSO sources or linker script. - */ -#define VDSO64_SYMBOL(base, name) \ -({ \ - extern const char VDSO64_##name[]; \ - (void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \ -}) -#endif - #if defined CONFIG_X86_32 || defined CONFIG_COMPAT extern const char VDSO32_PRELINK[]; diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3d61e204826..646b4c1ca69 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -23,8 +23,6 @@ struct vsyscall_gtod_data { struct timespec wall_to_monotonic; struct timespec wall_time_coarse; }; -extern struct vsyscall_gtod_data __vsyscall_gtod_data -__section_vsyscall_gtod_data; extern struct vsyscall_gtod_data vsyscall_gtod_data; #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d0983d255fb..d55597351f6 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -16,27 +16,19 @@ enum vsyscall_num { #ifdef __KERNEL__ #include -#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) -#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) - /* Definitions for CONFIG_GENERIC_TIME definitions */ -#define __section_vsyscall_gtod_data __attribute__ \ - ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) -#define __section_vsyscall_clock __attribute__ \ - ((unused, __section__ (".vsyscall_clock"),aligned(16))) #define __vsyscall_fn \ __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace #define VGETCPU_RDTSCP 1 #define VGETCPU_LSL 2 -extern int __vgetcpu_mode; -extern volatile unsigned long __jiffies; - /* kernel space (writeable) */ extern int vgetcpu_mode; extern struct timezone sys_tz; +#include + extern void map_vsyscall(void); #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h new file mode 100644 index 00000000000..341b3559452 --- /dev/null +++ b/arch/x86/include/asm/vvar.h @@ -0,0 +1,52 @@ +/* + * vvar.h: Shared vDSO/kernel variable declarations + * Copyright (c) 2011 Andy Lutomirski + * Subject to the GNU General Public License, version 2 + * + * A handful of variables are accessible (read-only) from userspace + * code in the vsyscall page and the vdso. They are declared here. + * Some other file must define them with DEFINE_VVAR. + * + * In normal kernel code, they are used like any other variable. + * In user code, they are accessed through the VVAR macro. + * + * Each of these variables lives in the vsyscall page, and each + * one needs a unique offset within the little piece of the page + * reserved for vvars. Specify that offset in DECLARE_VVAR. + * (There are 896 bytes available. If you mess up, the linker will + * catch it.) + */ + +/* Offset of vars within vsyscall page */ +#define VSYSCALL_VARS_OFFSET (3072 + 128) + +#if defined(__VVAR_KERNEL_LDS) + +/* The kernel linker script defines its own magic to put vvars in the + * right place. + */ +#define DECLARE_VVAR(offset, type, name) \ + EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset) + +#else + +#define DECLARE_VVAR(offset, type, name) \ + static type const * const vvaraddr_ ## name = \ + (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset)); + +#define DEFINE_VVAR(type, name) \ + type __vvar_ ## name \ + __attribute__((section(".vsyscall_var_" #name), aligned(16))) + +#define VVAR(name) (*vvaraddr_ ## name) + +#endif + +/* DECLARE_VVAR(offset, type, name) */ + +DECLARE_VVAR(0, volatile unsigned long, jiffies) +DECLARE_VVAR(8, int, vgetcpu_mode) +DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) + +#undef DECLARE_VVAR +#undef VSYSCALL_VARS_OFFSET -- cgit v1.2.3-70-g09d2 From 44259b1abfaa8bb819d25d41d71e8e33e25dd36a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 23 May 2011 09:31:28 -0400 Subject: x86-64: Move vread_tsc into a new file with sensible options vread_tsc is short and hot, and it's userspace code so the usual reasons to enable -pg and turn off sibling calls don't apply. (OK, turning off sibling calls has no effect. But it might someday...) As an added benefit, tsc.c is profilable now. Signed-off-by: Andy Lutomirski Cc: Andi Kleen Cc: Linus Torvalds Cc: "David S. Miller" Cc: Eric Dumazet Cc: Peter Zijlstra Cc: Borislav Petkov Link: http://lkml.kernel.org/r/%3C99c6d7f5efa3ccb65b4ac6eb443e1ab7bad47d7b.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/tsc.h | 4 ++++ arch/x86/kernel/Makefile | 8 +++++--- arch/x86/kernel/tsc.c | 34 ---------------------------------- arch/x86/kernel/vread_tsc_64.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 37 deletions(-) create mode 100644 arch/x86/kernel/vread_tsc_64.c (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 83e2efd181e..9db5583b6d3 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -51,6 +51,10 @@ extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern unsigned long native_calibrate_tsc(void); +#ifdef CONFIG_X86_64 +extern cycles_t vread_tsc(void); +#endif + /* * Boot-time check whether the TSCs are synchronized across * all CPUs/cores: diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 250806472a7..f5abe3a245b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities -CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg @@ -24,13 +23,16 @@ endif nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) -CFLAGS_tsc.o := $(nostackp) +CFLAGS_vread_tsc_64.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n GCOV_PROFILE_paravirt.o := n +# vread_tsc_64 is hot and should be fully optimized: +CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls + obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o @@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o -obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o +obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 24249a5360b..6cc6922262a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -763,40 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs) ret : clocksource_tsc.cycle_last; } -#ifdef CONFIG_X86_64 -static cycle_t __vsyscall_fn vread_tsc(void) -{ - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)vget_cycles(); - - last = VVAR(vsyscall_gtod_data).clock.cycle_last; - - if (likely(ret >= last)) - return ret; - - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a funciton of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; -} -#endif - static void resume_tsc(struct clocksource *cs) { clocksource_tsc.cycle_last = 0; diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c new file mode 100644 index 00000000000..a81aa9e9894 --- /dev/null +++ b/arch/x86/kernel/vread_tsc_64.c @@ -0,0 +1,36 @@ +/* This code runs in userspace. */ + +#define DISABLE_BRANCH_PROFILING +#include + +notrace cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret; + u64 last; + + /* + * Empirically, a fence (of type that depends on the CPU) + * before rdtsc is enough to ensure that rdtsc is ordered + * with respect to loads. The various CPU manuals are unclear + * as to whether rdtsc can be reordered with later loads, + * but no one has ever seen it happen. + */ + rdtsc_barrier(); + ret = (cycle_t)vget_cycles(); + + last = VVAR(vsyscall_gtod_data).clock.cycle_last; + + if (likely(ret >= last)) + return ret; + + /* + * GCC likes to generate cmov here, but this branch is extremely + * predictable (it's just a funciton of time and the likely is + * very likely) and there's a data dependence, so force GCC + * to generate a branch instead. I don't barrier() because + * we don't actually need a barrier, and if this function + * ever gets inlined it will generate worse code. + */ + asm volatile (""); + return last; +} -- cgit v1.2.3-70-g09d2