diff options
Diffstat (limited to 'arch/x86/vdso')
-rw-r--r-- | arch/x86/vdso/.gitignore | 2 | ||||
-rw-r--r-- | arch/x86/vdso/Makefile | 46 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 135 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32-setup.c | 5 | ||||
-rw-r--r-- | arch/x86/vdso/vdsox32.S | 22 | ||||
-rw-r--r-- | arch/x86/vdso/vdsox32.lds.S | 28 | ||||
-rw-r--r-- | arch/x86/vdso/vma.c | 78 |
7 files changed, 239 insertions, 77 deletions
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore index 60274d5746e..3282874bc61 100644 --- a/arch/x86/vdso/.gitignore +++ b/arch/x86/vdso/.gitignore @@ -1,5 +1,7 @@ vdso.lds vdso-syms.lds +vdsox32.lds +vdsox32-syms.lds vdso32-syms.lds vdso32-syscall-syms.lds vdso32-sysenter-syms.lds diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5d179502a52..fd14be1d147 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -3,21 +3,29 @@ # VDSO64-$(CONFIG_X86_64) := y +VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y vdso-install-$(VDSO64-y) += vdso.so +vdso-install-$(VDSOX32-y) += vdsox32.so vdso-install-$(VDSO32-y) += $(vdso32-images) # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o +vobjs-$(VDSOX32-y) += $(vobjx32s-compat) + +# Filter out x32 objects. +vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) + # files to link into kernel obj-$(VDSO64-y) += vma.o vdso.o +obj-$(VDSOX32-y) += vdsox32.o obj-$(VDSO32-y) += vdso32.o vdso32-setup.o -vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) +vobjs := $(foreach F,$(vobj64s),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so @@ -73,6 +81,42 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE $(call if_changed,vdsosym) # +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +targets += vdsox32-syms.lds +obj-$(VDSOX32-y) += vdsox32-syms.lds + +CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ + -Wl,-soname=linux-vdso.so.1 \ + -Wl,-z,max-page-size=4096 \ + -Wl,-z,common-page-size=4096 + +vobjx32s-y := $(vobj64s:.o=-x32.o) +vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y) + +$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so + +$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE + $(call if_changed,vdso) + +# # Build multiple 32-bit vDSO images to choose from at boot time. # obj-$(VDSO32-y) += vdso32-syms.lds diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6bc0e723b6e..885eff49d6a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + return ret; +} + + notrace static inline long vgetns(void) { long v; cycles_t cycles; if (gtod->clock.vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); - else + else if (gtod->clock.vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); + else + return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; return (v * gtod->clock.mult) >> gtod->clock.shift; } -notrace static noinline int do_realtime(struct timespec *ts) +/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ +notrace static int __always_inline do_realtime(struct timespec *ts) { unsigned long seq, ns; + int mode; + do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); + mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + timespec_add_ns(ts, ns); - return 0; + return mode; } -notrace static noinline int do_monotonic(struct timespec *ts) +notrace static int do_monotonic(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq, ns; + int mode; + do { - seq = read_seqbegin(>od->lock); - secs = gtod->wall_time_sec; - ns = gtod->wall_time_nsec + vgetns(); - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); - - /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec - * are all guaranteed to be nonnegative. - */ - while (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; + seq = read_seqcount_begin(>od->seq); + mode = gtod->clock.vclock_mode; + ts->tv_sec = gtod->monotonic_time_sec; + ts->tv_nsec = gtod->monotonic_time_nsec; + ns = vgetns(); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + timespec_add_ns(ts, ns); - return 0; + return mode; } -notrace static noinline int do_realtime_coarse(struct timespec *ts) +notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); return 0; } -notrace static noinline int do_monotonic_coarse(struct timespec *ts) +notrace static int do_monotonic_coarse(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq; do { - seq = read_seqbegin(>od->lock); - secs = gtod->wall_time_coarse.tv_sec; - ns = gtod->wall_time_coarse.tv_nsec; - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); - - /* wall_time_nsec and wall_to_monotonic.tv_nsec are - * guaranteed to be between 0 and NSEC_PER_SEC. - */ - if (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; + seq = read_seqcount_begin(>od->seq); + ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; + ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; + } while (unlikely(read_seqcount_retry(>od->seq, seq))); return 0; } notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { + int ret = VCLOCK_NONE; + switch (clock) { case CLOCK_REALTIME: - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) - return do_realtime(ts); + ret = do_realtime(ts); break; case CLOCK_MONOTONIC: - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) - return do_monotonic(ts); + ret = do_monotonic(ts); break; case CLOCK_REALTIME_COARSE: return do_realtime_coarse(ts); @@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) return do_monotonic_coarse(ts); } - return vdso_fallback_gettime(clock, ts); + if (ret == VCLOCK_NONE) + return vdso_fallback_gettime(clock, ts); + return 0; } int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { - long ret; - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { - if (likely(tv != NULL)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); - do_realtime((struct timespec *)tv); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; - tz->tz_dsttime = gtod->sys_tz.tz_dsttime; - } - return 0; + long ret = VCLOCK_NONE; + + if (likely(tv != NULL)) { + BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != + offsetof(struct timespec, tv_nsec) || + sizeof(*tv) != sizeof(struct timespec)); + ret = do_realtime((struct timespec *)tv); + tv->tv_usec /= 1000; } - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); - return ret; + if (unlikely(tz != NULL)) { + /* Avoid memcpy. Some old compilers fail to inline it */ + tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; + tz->tz_dsttime = gtod->sys_tz.tz_dsttime; + } + + if (ret == VCLOCK_NONE) + return vdso_fallback_gtod(tv, tz); + return 0; } int gettimeofday(struct timeval *, struct timezone *) __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index a944020fa85..66e6d935982 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -311,6 +311,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) int ret = 0; bool compat; +#ifdef CONFIG_X86_X32_ABI + if (test_thread_flag(TIF_X32)) + return x32_setup_additional_pages(bprm, uses_interp); +#endif + if (vdso_enabled == VDSO_DISABLED) return 0; diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S new file mode 100644 index 00000000000..d6b9a7f42a8 --- /dev/null +++ b/arch/x86/vdso/vdsox32.S @@ -0,0 +1,22 @@ +#include <asm/page_types.h> +#include <linux/linkage.h> +#include <linux/init.h> + +__PAGE_ALIGNED_DATA + + .globl vdsox32_start, vdsox32_end + .align PAGE_SIZE +vdsox32_start: + .incbin "arch/x86/vdso/vdsox32.so" +vdsox32_end: + .align PAGE_SIZE /* extra data here leaks to userspace. */ + +.previous + + .globl vdsox32_pages + .bss + .align 8 + .type vdsox32_pages, @object +vdsox32_pages: + .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 + .size vdsox32_pages, .-vdsox32_pages diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S new file mode 100644 index 00000000000..62272aa2ae0 --- /dev/null +++ b/arch/x86/vdso/vdsox32.lds.S @@ -0,0 +1,28 @@ +/* + * Linker script for x32 vDSO. + * We #include the file to define the layout details. + * Here we only choose the prelinked virtual address. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. We can define local symbols here called VDSO* to make their + * values visible using the asm-x86/vdso.h macros from the kernel proper. + */ + +#define VDSO_PRELINK 0 +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_getcpu; + __vdso_time; + local: *; + }; +} + +VDSOX32_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 17e18279649..00aaf047b39 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -24,7 +24,44 @@ extern unsigned short vdso_sync_cpuid; extern struct page *vdso_pages[]; static unsigned vdso_size; -static void __init patch_vdso(void *vdso, size_t len) +#ifdef CONFIG_X86_X32_ABI +extern char vdsox32_start[], vdsox32_end[]; +extern struct page *vdsox32_pages[]; +static unsigned vdsox32_size; + +static void __init patch_vdsox32(void *vdso, size_t len) +{ + Elf32_Ehdr *hdr = vdso; + Elf32_Shdr *sechdrs, *alt_sec = 0; + char *secstrings; + void *alt_data; + int i; + + BUG_ON(len < sizeof(Elf32_Ehdr)); + BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); + + sechdrs = (void *)hdr + hdr->e_shoff; + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (i = 1; i < hdr->e_shnum; i++) { + Elf32_Shdr *shdr = &sechdrs[i]; + if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { + alt_sec = shdr; + goto found; + } + } + + /* If we get here, it's probably a bug. */ + pr_warning("patch_vdsox32: .altinstructions not found\n"); + return; /* nothing to patch */ + +found: + alt_data = (void *)hdr + alt_sec->sh_offset; + apply_alternatives(alt_data, alt_data + alt_sec->sh_size); +} +#endif + +static void __init patch_vdso64(void *vdso, size_t len) { Elf64_Ehdr *hdr = vdso; Elf64_Shdr *sechdrs, *alt_sec = 0; @@ -47,7 +84,7 @@ static void __init patch_vdso(void *vdso, size_t len) } /* If we get here, it's probably a bug. */ - pr_warning("patch_vdso: .altinstructions not found\n"); + pr_warning("patch_vdso64: .altinstructions not found\n"); return; /* nothing to patch */ found: @@ -60,12 +97,20 @@ static int __init init_vdso(void) int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; int i; - patch_vdso(vdso_start, vdso_end - vdso_start); + patch_vdso64(vdso_start, vdso_end - vdso_start); vdso_size = npages << PAGE_SHIFT; for (i = 0; i < npages; i++) vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); +#ifdef CONFIG_X86_X32_ABI + patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); + npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; + vdsox32_size = npages << PAGE_SHIFT; + for (i = 0; i < npages; i++) + vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE); +#endif + return 0; } subsys_initcall(init_vdso); @@ -103,7 +148,10 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) /* Setup a VMA at program startup for the vsyscall page. Not called for compat tasks */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int setup_additional_pages(struct linux_binprm *bprm, + int uses_interp, + struct page **pages, + unsigned size) { struct mm_struct *mm = current->mm; unsigned long addr; @@ -113,8 +161,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return 0; down_write(&mm->mmap_sem); - addr = vdso_addr(mm->start_stack, vdso_size); - addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); + addr = vdso_addr(mm->start_stack, size); + addr = get_unmapped_area(NULL, addr, size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -122,10 +170,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) current->mm->context.vdso = (void *)addr; - ret = install_special_mapping(mm, addr, vdso_size, + ret = install_special_mapping(mm, addr, size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pages); + pages); if (ret) { current->mm->context.vdso = NULL; goto up_fail; @@ -136,6 +184,20 @@ up_fail: return ret; } +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + return setup_additional_pages(bprm, uses_interp, vdso_pages, + vdso_size); +} + +#ifdef CONFIG_X86_X32_ABI +int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + return setup_additional_pages(bprm, uses_interp, vdsox32_pages, + vdsox32_size); +} +#endif + static __init int vdso_setup(char *s) { vdso_enabled = simple_strtoul(s, NULL, 0); |